Browse Source

HADOOP-5531. Removed Chukwa from Hadoop 0.20 branch

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.20@757648 13f79535-47bb-0310-9956-ffa450edef68
Nigel Daley 16 years ago
parent
commit
14008cf6a7
100 changed files with 2 additions and 8743 deletions
  1. 2 0
      CHANGES.txt
  2. 0 1
      src/contrib/build.xml
  3. 0 25
      src/contrib/chukwa/CHANGES.txt
  4. 0 202
      src/contrib/chukwa/LICENSE.txt
  5. 0 24
      src/contrib/chukwa/bin/README
  6. 0 1
      src/contrib/chukwa/bin/VERSION
  7. 0 36
      src/contrib/chukwa/bin/agent.sh
  8. 0 64
      src/contrib/chukwa/bin/buildDailyArchive.sh
  9. 0 64
      src/contrib/chukwa/bin/buildHourlyArchive.sh
  10. 0 35
      src/contrib/chukwa/bin/chukwa
  11. 0 131
      src/contrib/chukwa/bin/chukwa-config.sh
  12. 0 191
      src/contrib/chukwa/bin/chukwa-daemon.sh
  13. 0 38
      src/contrib/chukwa/bin/chukwa-daemons.sh
  14. 0 32
      src/contrib/chukwa/bin/dailyRolling.sh
  15. 0 62
      src/contrib/chukwa/bin/dbAdmin.sh
  16. 0 25
      src/contrib/chukwa/bin/dbLoader.sh
  17. 0 32
      src/contrib/chukwa/bin/dbSetup.sh
  18. 0 24
      src/contrib/chukwa/bin/dumpArchive.sh
  19. 0 24
      src/contrib/chukwa/bin/dumpDataType.sh
  20. 0 24
      src/contrib/chukwa/bin/dumpRecord.sh
  21. 0 28
      src/contrib/chukwa/bin/hourlyRolling.sh
  22. 0 37
      src/contrib/chukwa/bin/jettyCollector.sh
  23. 0 59
      src/contrib/chukwa/bin/jobhisttailstarter.sh
  24. 0 2
      src/contrib/chukwa/bin/netstat.sh
  25. 0 46
      src/contrib/chukwa/bin/nodeActivityDataLoader.sh
  26. 0 155
      src/contrib/chukwa/bin/processSinkFiles.sh
  27. 0 0
      src/contrib/chukwa/bin/shutdown.sh
  28. 0 68
      src/contrib/chukwa/bin/slaves.sh
  29. 0 31
      src/contrib/chukwa/bin/start-agents.sh
  30. 0 36
      src/contrib/chukwa/bin/start-all.sh
  31. 0 31
      src/contrib/chukwa/bin/start-collectors.sh
  32. 0 39
      src/contrib/chukwa/bin/start-data-processors.sh
  33. 0 39
      src/contrib/chukwa/bin/start-probes.sh
  34. 0 0
      src/contrib/chukwa/bin/startup.sh
  35. 0 26
      src/contrib/chukwa/bin/stop-agents.sh
  36. 0 29
      src/contrib/chukwa/bin/stop-all.sh
  37. 0 26
      src/contrib/chukwa/bin/stop-collectors.sh
  38. 0 75
      src/contrib/chukwa/bin/stop-data-processors.sh
  39. 0 28
      src/contrib/chukwa/bin/stop-probes.sh
  40. 0 141
      src/contrib/chukwa/bin/systemDataLoader.sh
  41. 0 48
      src/contrib/chukwa/bin/torqueDataLoader.sh
  42. 0 48
      src/contrib/chukwa/bin/validateDemux.sh
  43. 0 147
      src/contrib/chukwa/bin/watchdog.sh
  44. 0 851
      src/contrib/chukwa/build.xml
  45. 0 18
      src/contrib/chukwa/conf/README
  46. 0 88
      src/contrib/chukwa/conf/aggregator.sql
  47. 0 1
      src/contrib/chukwa/conf/alert.conf.template
  48. 0 27
      src/contrib/chukwa/conf/chukwa-agent-conf.xml
  49. 0 62
      src/contrib/chukwa/conf/chukwa-agent-conf.xml.template
  50. 0 1
      src/contrib/chukwa/conf/chukwa-agents.template
  51. 0 32
      src/contrib/chukwa/conf/chukwa-collector-conf.xml
  52. 0 32
      src/contrib/chukwa/conf/chukwa-collector-conf.xml.template
  53. 0 107
      src/contrib/chukwa/conf/chukwa-demux-conf.xml
  54. 0 41
      src/contrib/chukwa/conf/chukwa-env.sh
  55. 0 66
      src/contrib/chukwa/conf/chukwa-env.sh.template
  56. 0 31
      src/contrib/chukwa/conf/chukwa-hadoop-metrics-log4j.properties
  57. 0 19
      src/contrib/chukwa/conf/chukwa-log4j.properties
  58. 0 1
      src/contrib/chukwa/conf/collectors
  59. 0 1
      src/contrib/chukwa/conf/collectors.template
  60. 0 7
      src/contrib/chukwa/conf/commons-logging.properties
  61. 0 604
      src/contrib/chukwa/conf/database_create_tables
  62. 0 43
      src/contrib/chukwa/conf/fields.spec
  63. 0 111
      src/contrib/chukwa/conf/hadoop-log4j.properties
  64. 0 11
      src/contrib/chukwa/conf/hadoop-metrics.properties
  65. 0 1
      src/contrib/chukwa/conf/initial_adaptors.template
  66. 0 1
      src/contrib/chukwa/conf/jdbc.conf
  67. 0 1
      src/contrib/chukwa/conf/jdbc.conf.template
  68. 0 8
      src/contrib/chukwa/conf/joblog.properties
  69. 0 12
      src/contrib/chukwa/conf/log4j.properties
  70. 0 1263
      src/contrib/chukwa/conf/mdl.xml.template
  71. 0 0
      src/contrib/chukwa/conf/nodeActivity.properties
  72. 0 0
      src/contrib/chukwa/conf/queueinfo.properties
  73. 0 13
      src/contrib/chukwa/conf/system-data-loader.properties
  74. 0 0
      src/contrib/chukwa/conf/torque.properties
  75. 0 0
      src/contrib/chukwa/conf/util.properties
  76. 0 19
      src/contrib/chukwa/default.properties
  77. 0 106
      src/contrib/chukwa/docs/README
  78. BIN
      src/contrib/chukwa/docs/paper/chukwa0.jpg
  79. BIN
      src/contrib/chukwa/docs/paper/chukwa1.jpg
  80. 0 304
      src/contrib/chukwa/docs/paper/chukwa_08.tex
  81. BIN
      src/contrib/chukwa/docs/paper/hicc_in_action2.png
  82. 0 94
      src/contrib/chukwa/docs/paper/usenix.sty
  83. BIN
      src/contrib/chukwa/docs/paper/widget.png
  84. BIN
      src/contrib/chukwa/hadoopjars/hadoop-0.18.0-core.jar
  85. 0 97
      src/contrib/chukwa/ivy.xml
  86. 0 63
      src/contrib/chukwa/ivy/ivysettings.xml
  87. 0 32
      src/contrib/chukwa/ivy/libraries.properties
  88. 0 9
      src/contrib/chukwa/lib/json-LICENSE.txt
  89. 0 563
      src/contrib/chukwa/lib/json-README.txt
  90. BIN
      src/contrib/chukwa/lib/json.jar
  91. 0 286
      src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/ChukwaArchiveKey.java
  92. 0 108
      src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/Chunk.java
  93. 0 63
      src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/ChunkBuilder.java
  94. 0 266
      src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/ChunkImpl.java
  95. 0 64
      src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/conf/ChukwaConfiguration.java
  96. 0 277
      src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/Aggregator.java
  97. 0 256
      src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/Consolidator.java
  98. 0 106
      src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/DataExpiration.java
  99. 0 244
      src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/DatabaseConfig.java
  100. 0 159
      src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/MetricsAggregation.java

+ 2 - 0
CHANGES.txt

@@ -75,6 +75,8 @@ Release 0.20.0 - Unreleased
     to be only group readable instead of world readable.
     (Amareshwari Sriramadasu via yhemanth)
 
+    HADOOP-5531. Removed Chukwa from Hadoop 0.20.0. (nigel)
+
   NEW FEATURES
 
     HADOOP-4575. Add a proxy service for relaying HsftpFileSystem requests.

+ 0 - 1
src/contrib/build.xml

@@ -50,7 +50,6 @@
       <fileset dir="." includes="streaming/build.xml"/>
       <fileset dir="." includes="fairscheduler/build.xml"/>
       <fileset dir="." includes="capacity-scheduler/build.xml"/>
-      <fileset dir="." includes="chukwa/build.xml"/>
     </subant>
   </target>
   

+ 0 - 25
src/contrib/chukwa/CHANGES.txt

@@ -1,25 +0,0 @@
-Trunk (unreleased changes)
-
-  INCOMPATIBLE CHANGES
-
-  NEW FEATURES
-
-  IMPROVEMENTS
-
-    HADOOP-4431. Add versionning/tags to Chukwa Chunk. 
-    (Jerome Boulon via Johan)
-
-    HADOOP-4433. Improve data loader for collecting metrics and log files.
-    (Eric Yang via omalley)
-
-  OPTIMIZATIONS
-
-  BUG FIXES
-
-Release 0.19.0 - Unreleased
-
-  NEW FEATURES
-
-    HADOOP-3719. Initial checkin of Chukwa, which is a data collection and 
-    analysis framework. (Jerome Boulon, Andy Konwinski, Ari Rabkin, 
-    and Eric Yang)

+ 0 - 202
src/contrib/chukwa/LICENSE.txt

@@ -1,202 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.

+ 0 - 24
src/contrib/chukwa/bin/README

@@ -1,24 +0,0 @@
-Check for an updated copy of this README at http://wiki.apache.org/hadoop/Chukwa_Startup_and_Shutdown_Scripts
-
-start-all.sh - runs start-collectors.sh, start-agents.sh, start-probes.sh, start-data-processors.sh
-
-start-collectors.sh - start the chukwa collector daemon (jettyCollector.sh) on hosts listed in conf/collectors
-stop-collectors.sh - stop the chukwa collector daemon (jettyCollector.sh) on hosts listed in conf/collectors
-
-jettyCollector.sh - start the chukwa collector daemon on the current host
-
-start-agents.sh - start chukwa agent daemon (agent.sh) on all hosts listed in conf/chukwa-agents
-stop-agents.sh - stop chukwa agent daemon (agent.sh) on all hosts listed in conf/chukwa-agents
-
-agent.sh - start the chukwa agent on the current host
-
-start-probes.sh - runs, in this order, systemDataLoader.sh, torqueDataLoader.sh, nodeActivityDataLoader.sh
-
-systemDataLoader.sh - every 60 seconds run: sar, iostat, top, top, df, netstat. //TODO: figure out where these guys send their output
-torqueDataLoader.sh - //TODO: FILL THIS IN!
-nodeActivityDataLoader.sh - //TODO: FILL THIS IN!
-
-slaves.sh <command command_args ...> - run arbitrary commands on all hosts in conf/slaves
-
-jettycollector.sh - start a jetty based version of the Chukwa collector
-agent.sh - start the chukwa agent on the local machine

+ 0 - 1
src/contrib/chukwa/bin/VERSION

@@ -1 +0,0 @@
-0.1.1

+ 0 - 36
src/contrib/chukwa/bin/agent.sh

@@ -1,36 +0,0 @@
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-pid=$$
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-echo "hadoop jar for agent is " ${HADOOP_JAR}
-trap '${JPS} | grep ChukwaAgent | grep -v grep | grep -o "[^ ].*" | cut -f 1 -d" " | xargs kill -TERM ; exit 0' 1 2 15
-
-if [ "X$1" = "Xstop" ]; then
-  echo -n "Shutting down agent..."
-  JETTY_PID=`${JPS} | grep ChukwaAgent | grep -v grep | grep -o "[^ ].*" | cut -f 1 -d" "`
-  kill -TERM ${JETTY_PID} >&/dev/null
-  echo "done"
-  exit 0
-fi
-
-
-${JAVA_HOME}/bin/java -Xms32M -Xmx64M -DAPP=agent -Dlog4j.configuration=chukwa-log4j.properties -DCHUKWA_HOME=${CHUKWA_HOME} -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -DCHUKWA_LOG_DIR=${CHUKWA_LOG_DIR} -classpath ${CLASSPATH}:${CHUKWA_AGENT}:${CHUKWA_CORE}:${HADOOP_JAR}:${COMMON}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.datacollection.agent.ChukwaAgent $@

+ 0 - 64
src/contrib/chukwa/bin/buildDailyArchive.sh

@@ -1,64 +0,0 @@
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-pid=$$
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-. "$bin"/chukwa-config.sh
-
-echo "${pid}" > "$CHUKWA_HOME/var/run/buildDailyArchive.pid"
-
-HADOOP_CONF_DIR="${HADOOP_HOME}/conf/"
-HADOOP_CMDE="${HADOOP_HOME}/bin/hadoop "
-
-while [ 1 ]
- do
-  now=`date +%s`
-  strDate=`date +%m/%d/%y%n`
-  srcHourly="/chukwa/postprocess/srcDaily$now/"
-
-  echo "Running $strDate $now" >> "${CHUKWA_LOG_DIR}/daily.log"
-
-  echo "srcHourly: $srcHourly " >> "${CHUKWA_LOG_DIR}/daily.log"
-
-  $HADOOP_CMDE dfs -mkdir $srcHourly/hourly
-  echo "done with mkdir" >> "${CHUKWA_LOG_DIR}/daily.log"
- 
-  $HADOOP_CMDE dfs -mv "/chukwa/archives/hourly/*.arc" ${srcHourly}/hourly/
-  echo "done with mv archives" >> "${CHUKWA_LOG_DIR}/daily.log"
- 
-  # Build the archive
-  $HADOOP_CMDE jar ${CHUKWA_CORE} org.apache.hadoop.chukwa.extraction.archive.ChuckwaArchiveBuilder Daily $srcHourly/hourly $srcHourly/daily
-  echo "done with chuckwaArchiveBuilder" >> "${CHUKWA_LOG_DIR}/daily.log"
-  
-   ## Hourly Archive available call all processors
-   ##############  ############## 
-  
-   ##############  ############## 
-  
-  
-  ############## MERGE or MOVE ##############
-  
-  ############## MERGE or MOVE ##############
-  
-  
-  now=`date +%s`
-  strDate=`date +%m/%d/%y%n`
-  echo "Stopping ${strDate} ${now}" >> "${CHUKWA_LOG_DIR}/daily.log"
-
-  sleep 36000
-done

+ 0 - 64
src/contrib/chukwa/bin/buildHourlyArchive.sh

@@ -1,64 +0,0 @@
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-pid=$$
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-. "$bin"/chukwa-config.sh
-
-echo "${pid}" > "$CHUKWA_HOME/var/run/buildDailyArchive.pid"
-
-HADOOP_CONF_DIR="${HADOOP_HOME}/conf/"
-HADOOP_CMDE="${HADOOP_HOME}/bin/hadoop "
-
-while [ 1 ]
- do
-  now=`date +%s`
-  strDate=`date +%m/%d/%y%n`
-  srcHourly="/chukwa/postprocess/srcHourly$now/"
-
-  echo "Running $strDate $now" >> "${CHUKWA_LOG_DIR}/hourly.log"
-
-  echo "srcHourly: $srcHourly " >> "${CHUKWA_LOG_DIR}/hourly.log"
-
-  $HADOOP_CMDE dfs -mkdir $srcHourly/raw
-  echo "done with mkdir" >> "${CHUKWA_LOG_DIR}/hourly.log"
- 
-  $HADOOP_CMDE dfs -mv "/chukwa/archives/raw/*.arc" ${srcHourly}/raw/
-  echo "done with mv archives" >> "${CHUKWA_LOG_DIR}/hourly.log"
- 
-  # Build the archive
-  $HADOOP_CMDE jar ${CHUKWA_CORE} org.apache.hadoop.chukwa.extraction.archive.ChuckwaArchiveBuilder Hourly $srcHourly/arcFiles $srcHourly/hourly
-  echo "done with chuckwaArchiveBuilder" >> "${CHUKWA_LOG_DIR}/hourly.log"
-  
-   ## Hourly Archive available call all processors
-   ##############  ############## 
-  
-   ##############  ############## 
-  
-  
-  ############## MERGE or MOVE ##############
-  
-  ############## MERGE or MOVE ##############
-  
-  
-  now=`date +%s`
-  strDate=`date +%m/%d/%y%n`
-  echo "Stopping ${strDate} ${now}" >> "${CHUKWA_LOG_DIR}/hourly.log"
-
-  sleep 36000
-done

+ 0 - 35
src/contrib/chukwa/bin/chukwa

@@ -1,35 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# The Chukwa command script
-#
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-# get arguments
-COMMAND=$1
-shift
-
-if [ -f "${CHUKWA_CONF_DIR}/chukwa-env.sh" ]; then
-  . "${CHUKWA_CONF_DIR}/chukwa-env.sh"
-fi
-
-exec "${CHUKWA_HOME}/bin/$@"

+ 0 - 131
src/contrib/chukwa/bin/chukwa-config.sh

@@ -1,131 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# included in all the hadoop scripts with source command
-# should not be executable directly
-# also should not be passed any arguments, since we need original $*
-
-# resolve links - $0 may be a softlink
-
-this="$0"
-while [ -h "$this" ]; do
-  ls=`ls -ld "$this"`
-  link=`expr "$ls" : '.*-> \(.*\)$'`
-  if expr "$link" : '.*/.*' > /dev/null; then
-    this="$link"
-  else
-    this=`dirname "$this"`/"$link"
-  fi
-done
-
-# convert relative path to absolute path
-bin=`dirname "$this"`
-script=`basename "$this"`
-bin=`cd "$bin"; pwd`
-this="$bin/$script"
-
-
-# the root of the Chukwa installation
-export CHUKWA_HOME=`dirname "$this"`/..
-
-#check to see if the conf dir is given as an optional argument
-if [ $# -gt 1 ]
-then
-    if [ "--config" = "$1" ]
-          then
-              shift
-              confdir=$1
-              shift
-              CHUKWA_CONF_DIR=$confdir
-    fi
-fi
-
-#check to see it is specified whether to use the slaves or the
-# masters file
-if [ $# -gt 1 ]
-then
-    if [ "--hosts" = "$1" ]
-    then
-        shift
-        slavesfile=$1
-        shift
-        export CHUKWA_SLAVES="${CHUKWA_CONF_DIR}/$slavesfile"
-    fi
-fi
-
-#check to see if the conf dir is given as an optional argument
-if [ $# -gt 1 ]
-then
-    if [ "--watchdog" = "$1" ]
-          then
-              shift
-              WATCHDOG="true"
-    fi
-fi
-
-if [ -z ${CHUKWA_LOG_DIR} ]; then
-    export CHUKWA_LOG_DIR="$CHUKWA_HOME/logs"
-fi
-
-if [ -z ${CHUKWA_PID_DIR} ]; then
-    export CHUKWA_PID_DIR="${CHUKWA_HOME}/var/run"
-fi
-
-CHUKWA_VERSION=`cat ${CHUKWA_HOME}/bin/VERSION`
-
-# Allow alternate conf dir location.
-if [ -z "$CHUKWA_CONF_DIR" ]; then
-    CHUKWA_CONF_DIR="${CHUKWA_CONF_DIR:-$CHUKWA_HOME/conf}"
-    export CHUKWA_CONF_DIR=${CHUKWA_HOME}/conf
-fi
-
-if [ -f "${CHUKWA_CONF_DIR}/chukwa-env.sh" ]; then
-  . "${CHUKWA_CONF_DIR}/chukwa-env.sh"
-fi
-
-export DATACONFIG=${CHUKWA_CONF_DIR}/mdl.xml
-COMMON=`ls ${CHUKWA_HOME}/lib/*.jar ${CHUKWA_HOME}/hadoopjars/commons*.jar`
-export COMMON=`echo ${COMMON} | sed 'y/ /:/'`
-export CHUKWA_CORE=${CHUKWA_HOME}/chukwa-core-${CHUKWA_VERSION}.jar
-export CHUKWA_AGENT=${CHUKWA_HOME}/chukwa-agent-${CHUKWA_VERSION}.jar
-export CURRENT_DATE=`date +%Y%m%d%H%M`
-
-if [ -z ${HADOOP_JAR} ]; then
-  if [ -z ${HADOOP_HOME} ]; then
-        export HADOOP_HOME=../../..
-    fi
-    if [ -d ${HADOOP_HOME} ]; then
-        export HADOOP_JAR=`ls ${HADOOP_HOME}/hadoop-*-core.jar`
-        if [ -z ${HADOOP_JAR} ]; then
-            echo "Please make sure hadoop-*-core.jar exists in ${HADOOP_HOME}"
-            exit -1
-        fi
-    else
-        if [ -d ${CHUKWA_HOME}/hadoopjars ]; then
-            echo "WARNING: neither HADOOP_HOME nor HADOOP_JAR is set we we are reverting to defaults in $CHUKWA_HOME/hadoopjars dir"
-            export HADOOP_JAR=`ls ${CHUKWA_HOME}/hadoopjars/hadoop-*-core.jar`
-        else
-            echo "Please make sure hadoop-*-core.jar exists in ${CHUKWA_HOME}/hadoopjars"
-            exit -1
-        fi
-    fi
-fi
-
-if [ -z "$JAVA_HOME" ] ; then
-  echo ERROR! You forgot to set JAVA_HOME in conf/chukwa-env.sh   
-fi
-
-export JPS="ps ax"
-

+ 0 - 191
src/contrib/chukwa/bin/chukwa-daemon.sh

@@ -1,191 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Runs a Chukwa command as a daemon.
-#
-# Environment Variables
-#
-#   CHUKWA_CONF_DIR  Alternate conf dir. Default is ${CHUKWA_HOME}/conf.
-#   CHUKWA_LOG_DIR   Where log files are stored.  PWD by default.
-#   CHUKWA_MASTER    host:path where chukwa code should be rsync'd from
-#   CHUKWA_PID_DIR   The pid files are stored. ${CHUKWA_HOME}/var/tmp by default.
-#   CHUKWA_IDENT_STRING   A string representing this instance of chukwa. $USER by default
-#   CHUKWA_NICENESS The scheduling priority for daemons. Defaults to 0.
-##
-
-usage="Usage: chukwa-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] (start|stop) <chukwa-command> <args...>"
-
-# if no args specified, show usage
-if [ $# -le 1 ]; then
-  echo $usage
-  exit 1
-fi
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-# get arguments
-startStop=$1
-shift
-command=$1
-shift
-
-chukwa_rotate_log ()
-{
-    log=$1;
-    num=5;
-    if [ -n "$2" ]; then
-	num=$2
-    fi
-    if [ -f "$log" ]; then # rotate logs
-	while [ $num -gt 1 ]; do
-	    prev=`expr $num - 1`
-	    [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
-	    num=$prev
-	done
-	mv "$log" "$log.$num";
-    fi
-}
-
-if [ -f "${CHUKWA_CONF_DIR}/chukwa-env.sh" ]; then
-  . "${CHUKWA_CONF_DIR}/chukwa-env.sh"
-fi
-
-# get log directory
-if [ "$CHUKWA_LOG_DIR" = "" ]; then
-  export CHUKWA_LOG_DIR="$CHUKWA_HOME/logs"
-fi
-mkdir -p "$CHUKWA_LOG_DIR"
-
-if [ "$CHUKWA_PID_DIR" = "" ]; then
-  CHUKWA_PID_DIR=$CHUKWA_HOME/var/run
-fi
-
-if [ "$CHUKWA_IDENT_STRING" = "" ]; then
-  export CHUKWA_IDENT_STRING="$USER"
-fi
-
-# some variables
-export CHUKWA_LOGFILE=chukwa-$CHUKWA_IDENT_STRING-$command-$HOSTNAME.log
-export CHUKWA_ROOT_LOGGER="INFO,DRFA"
-log=$CHUKWA_LOG_DIR/chukwa-$CHUKWA_IDENT_STRING-$command-$HOSTNAME.out
-pid=$CHUKWA_PID_DIR/chukwa-$CHUKWA_IDENT_STRING-$command.pid
-
-# Set default scheduling priority
-if [ "$CHUKWA_NICENESS" = "" ]; then
-    export CHUKWA_NICENESS=0
-fi
-
-case $startStop in
-
-  (start)
-    MAIL=`cat ${CHUKWA_HOME}/conf/alert.conf`
-
-    RANDOM=`date '+%s'`
-    PARTROL_HOUR=$[($RANDOM % 24)]
-    if [ ${PARTROL_HOUR} -gt 12 ]; then
-        PARTROL_HOUR2=$[${PARTROL_HOUR}-12]
-    else 
-        PARTROL_HOUR2=$[${PARTROL_HOUR}+12]
-    fi
-    if [ "${WATCHDOG}" != "" ]; then
-        mkdir -p ${CHUKWA_HOME}/var/tmp >&/dev/null
-        crontab -l > ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}
-        crontest=$?
-
-        if [ "X${crontest}" != "X0" ]; then
-          echo "MAILTO=${MAIL}" > ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}
-        else
-          grep -v "${CHUKWA_HOME}/bin/watchdog.sh" ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE} | grep -v MAILTO | grep -v "cat ${CHUKWA_HOME}/var/run/watchdog.out" | grep -v ${CHUKWA_HOME}/tools/expire.sh > ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}.2
-          echo "MAILTO=${MAIL}" > ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}
-          cat ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}.2 >> ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}
-          rm -f ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}.2
-        fi
-        cat >> ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE} << CRON
-*/5 * * * * ${CHUKWA_HOME}/bin/watchdog.sh > ${CHUKWA_HOME}/var/run/watchdog.out
-1 ${PARTROL_HOUR},${PARTROL_HOUR2} * * * /bin/bash -c "cat ${CHUKWA_HOME}/var/run/watchdog.out; cat /dev/null > ${CHUKWA_HOME}/var/run/watchdog.out"
-15 3 * * * ${CHUKWA_HOME}/tools/expire.sh 10 ${CHUKWA_LOG_DIR} nowait
-CRON
-
-        # save crontab
-        echo -n "Registering watchdog.."
-        mkdir -p ${CHUKWA_HOME}/var/tmp >&/dev/null
-        crontab ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE} > /dev/null 2>&1
-        rm -f ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}
-        echo "done"
-    fi
-
-    mkdir -p "$CHUKWA_PID_DIR"
-
-    if [ -f $pid ]; then
-      if kill -0 `cat $pid` > /dev/null 2>&1; then
-        echo $command running as process `cat $pid`.  Stop it first.
-        exit 1
-      fi
-    fi
-
-    if [ "$CHUKWA_MASTER" != "" ]; then
-      echo rsync from $CHUKWA_MASTER
-      rsync -a -e ssh --delete --exclude=.svn $CHUKWA_MASTER/ "$CHUKWA_HOME"
-    fi
-
-    chukwa_rotate_log $log
-    echo starting $command, logging to $log
-    cd "$CHUKWA_HOME"
-    nohup nice -n $CHUKWA_NICENESS "$CHUKWA_HOME"/bin/chukwa -config $command "$@" > "$log" 2>&1 < /dev/null &
-    echo $! > $pid
-    sleep 1; head "$log"
-    ;;
-          
-  (stop)
-
-    if [ "${WATCHDOG}" != "" ]; then
-        # remove watchdog
-        crontab -l | grep -v ${CHUKWA_HOME}/bin/watchdog.sh | grep -v ${CHUKWA_HOME}/var/run/watchdog.out | grep -v ${CHUKWA_HOME}/tools/expire.sh > ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}
-        crontab ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}
-        rm -f ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}
-    fi
-
-    if [ -f $CHUKWA_HOME/bin/$command ]; then
-      $CHUKWA_HOME/bin/$command stop
-      rm -f $pid
-    else
-      if [ -f $pid ]; then
-        if kill -0 `cat $pid` > /dev/null 2>&1; then
-          echo stopping $command
-          kill `cat $pid`
-          rm -f $pid
-        else
-          echo no $command to stop
-        fi
-      else
-        echo no $command to stop
-      fi
-    fi
-    ;;
-
-  (*)
-    echo $usage
-    exit 1
-    ;;
-
-esac
-
-

+ 0 - 38
src/contrib/chukwa/bin/chukwa-daemons.sh

@@ -1,38 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Run a Hadoop command on all slave hosts.
-
-usage="Usage: chukwa-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..."
-
-# if no args specified, show usage
-if [ $# -le 1 ]; then
-  echo $usage
-  exit 1
-fi
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. $bin/chukwa-config.sh
-
-if [ ${WATCHDOG}!="" ]; then
-  exec "$bin/slaves.sh" --config $CHUKWA_CONF_DIR cd "$CHUKWA_HOME" \; "$bin/chukwa-daemon.sh" --config $CHUKWA_CONF_DIR --watchdog "$@"
-else
-  exec "$bin/slaves.sh" --config $CHUKWA_CONF_DIR cd "$CHUKWA_HOME" \; "$bin/chukwa-daemon.sh" --config $CHUKWA_CONF_DIR "$@"
-fi

+ 0 - 32
src/contrib/chukwa/bin/dailyRolling.sh

@@ -1,32 +0,0 @@
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-pid=$$
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-. "$bin"/chukwa-config.sh
-
-HADOOP_CONF_DIR="${HADOOP_HOME}/conf/"
-HADOOP_CMDE="${HADOOP_HOME}/bin/hadoop "
-
-  $HADOOP_CMDE jar ${CHUKWA_CORE} org.apache.hadoop.chukwa.extraction.demux.DailyChukwaRecordRolling rollInSequence true deleteRawdata true
-
-  previousDay=`date --date="2 day ago" +%Y%m%d`
-  #previousDay=`date -v -2d +%Y%m%d`
-  echo "deleting /chukwa/postprocess/srcSink${previousDay}_*"
-  $HADOOP_CMDE dfs -rmr "/chukwa/postprocess/srcSink${previousDay}_*"
-

+ 0 - 62
src/contrib/chukwa/bin/dbAdmin.sh

@@ -1,62 +0,0 @@
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-pid=$$
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-if [ "$CHUKWA_IDENT_STRING" = "" ]; then
-  export CHUKWA_IDENT_STRING="$USER"
-fi
-
-trap 'rm -f $CHUKWA_HOME/var/run/chukwa-$CHUKWA_IDENT_STRING-dbAdmin.sh.pid ${CHUKWA_HOME}/var/run/dbAdmin.pid; exit 0' 1 2 15
-EXP_DATE=`date +%Y-%m-%d`
-
-JVM_OPTS="-DAPP=dbAdmin -Dlog4j.configuration=chukwa-log4j.properties -DCHUKWA_HOME=${CHUKWA_HOME} -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -DCHUKWA_LOG_DIR=${CHUKWA_LOG_DIR} -DDATACONFIG=${CHUKWA_CONF_DIR}/mdl.xml -classpath ${CLASSPATH}:${CHUKWA_CORE}:${COMMON}:${HADOOP_JAR}:${CHUKWA_CONF_DIR}"
-
-echo "${pid}" > "${CHUKWA_HOME}/var/run/dbAdmin.pid"
-while [ 1 ]
-  do
-    start=`date +%s`
-    cat ${CHUKWA_CONF_DIR}/jdbc.conf | \
-    while read LINE; do
-        CLUSTER=`echo ${LINE} | cut -f 1 -d'='`
-        ${JAVA_HOME}/bin/java -DCLUSTER=${CLUSTER} ${JVM_OPTS} org.apache.hadoop.chukwa.database.TableCreator ${EXP_DATE} 7 &
-        ${JAVA_HOME}/bin/java -DCLUSTER=${CLUSTER} ${JVM_OPTS} org.apache.hadoop.chukwa.database.TableCreator ${EXP_DATE} 30 &
-        ${JAVA_HOME}/bin/java -DCLUSTER=${CLUSTER} ${JVM_OPTS} org.apache.hadoop.chukwa.database.TableCreator ${EXP_DATE} 91 &
-        ${JAVA_HOME}/bin/java -DCLUSTER=${CLUSTER} ${JVM_OPTS} org.apache.hadoop.chukwa.database.TableCreator ${EXP_DATE} 365 &
-        ${JAVA_HOME}/bin/java -DCLUSTER=${CLUSTER} ${JVM_OPTS} org.apache.hadoop.chukwa.database.TableCreator ${EXP_DATE} 3650 &
-        ${JAVA_HOME}/bin/java -DCLUSTER=${CLUSTER} ${JVM_OPTS} org.apache.hadoop.chukwa.database.Aggregator &
-        ${JAVA_HOME}/bin/java -DCLUSTER=${CLUSTER} ${JVM_OPTS} org.apache.hadoop.chukwa.database.DataExpiration ${EXP_DATE} 7 &
-        ${JAVA_HOME}/bin/java -DCLUSTER=${CLUSTER} ${JVM_OPTS} org.apache.hadoop.chukwa.database.DataExpiration ${EXP_DATE} 30 &
-        ${JAVA_HOME}/bin/java -DCLUSTER=${CLUSTER} ${JVM_OPTS} org.apache.hadoop.chukwa.database.DataExpiration ${EXP_DATE} 91 &
-        ${JAVA_HOME}/bin/java -DCLUSTER=${CLUSTER} ${JVM_OPTS} org.apache.hadoop.chukwa.database.DataExpiration ${EXP_DATE} 365 &
-        ${JAVA_HOME}/bin/java -DCLUSTER=${CLUSTER} ${JVM_OPTS} org.apache.hadoop.chukwa.database.DataExpiration ${EXP_DATE} 3650 &
-    done
-    end=`date +%s`
-    duration=$(( $end - $start ))
-    if [ $duration -lt 300 ]; then
-        sleep=$(( 300 - $duration ))
-        SLEEP_COUNTER=`expr $sleep / 5`
-        while [ $SLEEP_COUNTER -gt 1 ]; do
-            sleep 5
-            SLEEP_COUNTER=`expr $SLEEP_COUNTER - 1`
-        done
-    fi
-done

+ 0 - 25
src/contrib/chukwa/bin/dbLoader.sh

@@ -1,25 +0,0 @@
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-pid=$$
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-echo "${pid}" > "$CHUKWA_HOME/var/run/dbLoader.pid"
-${JAVA_HOME}/bin/java -DDATACONFIG=${CHUKWA_CONF_DIR}/mdl.xml -classpath ${CLASSPATH}:${CHUKWA_CORE}:${COMMON}:${HADOOP_JAR}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.extraction.database.MetricDataLoader $1

+ 0 - 32
src/contrib/chukwa/bin/dbSetup.sh

@@ -1,32 +0,0 @@
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-pid=$$
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-EXP_DATE=`date +%Y-%m-%d`
-echo -n "SETUP Database partition..."
-echo "${pid}" > "$CHUKWA_HOME/var/run/dbSetup.pid"
-${JAVA_HOME}/bin/java -DCLUSTER=$1 -DDATACONFIG=${CHUKWA_CONF_DIR}/mdl.xml -classpath ${CLASSPATH}:${CHUKWA_CORE}:${COMMON}:${HADOOP_JAR}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.database.TableCreator ${EXP_DATE} 7 #>/dev/null 2>&1
-${JAVA_HOME}/bin/java -DCLUSTER=$1 -DDATACONFIG=${CHUKWA_CONF_DIR}/mdl.xml -classpath ${CLASSPATH}:${CHUKWA_CORE}:${COMMON}:${HADOOP_JAR}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.database.TableCreator ${EXP_DATE} 30 >/dev/null 2>&1
-${JAVA_HOME}/bin/java -DCLUSTER=$1 -DDATACONFIG=${CHUKWA_CONF_DIR}/mdl.xml -classpath ${CLASSPATH}:${CHUKWA_CORE}:${COMMON}:${HADOOP_JAR}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.database.TableCreator ${EXP_DATE} 91 >/dev/null 2>&1
-${JAVA_HOME}/bin/java -DCLUSTER=$1 -DDATACONFIG=${CHUKWA_CONF_DIR}/mdl.xml -classpath ${CLASSPATH}:${CHUKWA_CORE}:${COMMON}:${HADOOP_JAR}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.database.TableCreator ${EXP_DATE} 365 >/dev/null 2>&1
-echo "done"
-rm -f "$CHUKWA_HOME/var/run/dbSetup.pid"

+ 0 - 24
src/contrib/chukwa/bin/dumpArchive.sh

@@ -1,24 +0,0 @@
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-pid=$$
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-${JAVA_HOME}/bin/java -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -classpath ${CLASSPATH}:${CHUKWA_CORE}:${COMMON}:${HADOOP_JAR}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.util.DumpArchive $1

+ 0 - 24
src/contrib/chukwa/bin/dumpDataType.sh

@@ -1,24 +0,0 @@
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-pid=$$
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-${JAVA_HOME}/bin/java -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -classpath ${CLASSPATH}:${CHUKWA_CORE}:${COMMON}:${HADOOP_JAR}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.util.DumpDataType $@

+ 0 - 24
src/contrib/chukwa/bin/dumpRecord.sh

@@ -1,24 +0,0 @@
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-pid=$$
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-${JAVA_HOME}/bin/java -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -classpath ${CLASSPATH}:${CHUKWA_CORE}:${COMMON}:${HADOOP_JAR}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.util.DumpRecord $1

+ 0 - 28
src/contrib/chukwa/bin/hourlyRolling.sh

@@ -1,28 +0,0 @@
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-pid=$$
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-. "$bin"/chukwa-config.sh
-
-HADOOP_CONF_DIR="${HADOOP_HOME}/conf/"
-HADOOP_CMDE="${HADOOP_HOME}/bin/hadoop "
-
-  $HADOOP_CMDE jar ${CHUKWA_CORE} org.apache.hadoop.chukwa.extraction.demux.HourlyChukwaRecordRolling rollInSequence true deleteRawdata true
-
-

+ 0 - 37
src/contrib/chukwa/bin/jettyCollector.sh

@@ -1,37 +0,0 @@
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-pid=$$
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-trap 'stop; exit 0' 1 2 15
-
-function stop {
-  echo -n "Shutting down Collector..."
-  ${JPS} | grep CollectorStub | grep -v grep | grep -o '[^ ].*'| cut -f 1 -d" " | xargs kill -TERM >&/dev/null
-  echo "done"
-  exit 0
-}
-
-if [ "X$1" = "Xstop" ]; then
-  stop
-fi
-
-${JAVA_HOME}/bin/java -DAPP=collector -Dlog4j.configuration=chukwa-log4j.properties -DCHUKWA_HOME=${CHUKWA_HOME} -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -DCHUKWA_LOG_DIR=${CHUKWA_LOG_DIR} -classpath ${CLASSPATH}:${CHUKWA_CORE}:${COMMON}:${HADOOP_JAR}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.datacollection.collector.CollectorStub $@

+ 0 - 59
src/contrib/chukwa/bin/jobhisttailstarter.sh

@@ -1,59 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# A script to tell chukwa to monitor job history files.
-# Rather simpleminded -- gets a list of what's being monitored,
-# and tells chukwa to watch everything in job hist that it isn't already scanning.
-#   Relies on having netcat. Also, control socket portno is currently hardcoded,
-#   as are hostname and adaptor name.
-
-if [ $# -lt 1 ]; then
-         echo 1>&2 Usage: $0 '<path to job history files>'
-         exit 127
-    fi
-
-
-JOB_HIST=`(cd $1; pwd)`  #returns an absolute path
-echo "assuming job history logs live in $JOB_HIST"
-JOBHISTFILES=/tmp/jobhistfiles
-TAILEDFILES=/tmp/tailedhists
-
-#Step 1 -- get a list of currently watched files
-(nc localhost 9093 | grep -o "[^/]*$" | grep -o '^[^ ]*' | sort > $TAILEDFILES)  <<HERE
-list
-close
-HERE
-
-#step 2 -- get the list of history files
-ls $JOB_HIST | grep -v '\.xml' | sort  > $JOBHISTFILES
-#step 3 -- start watching each new history file
-#find files that aren't being watched, and are in job history dir
-#NEWHISTFILES=`cat $JOBHISTFILES`
-#NEWHISTFILES=`sort /tmp/both | uniq -u > /tmp/one`| uniq -d - $JOBHISTFILES`
-
-cat $JOBHISTFILES $TAILEDFILES | sort | uniq -u > /tmp/either  
-#either not tailed, or not a history file
-NEWHISTFILES=`cat /tmp/either $JOBHISTFILES | sort | uniq -d`
-#better be a job history file -- hence, not being tailed
-
-for job in $NEWHISTFILES ; do
-	#new jobs are rare, safe to create socket per job hist file
-nc localhost 9093 <<HERE
-add LineFileTailUTF8 $JOB_HIST$job 0
-close
-HERE
-  echo "told Chukwa agent to start watching $job"
-done

+ 0 - 2
src/contrib/chukwa/bin/netstat.sh

@@ -1,2 +0,0 @@
-#!/bin/bash
-netstat -a | grep ESTABLISH | grep -v '        0      0'

+ 0 - 46
src/contrib/chukwa/bin/nodeActivityDataLoader.sh

@@ -1,46 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-if [ "X$1" = "Xstop" ]; then
-  echo -n "Shutting down Node Activity Data Loader..."
-  if [ -f ${CHUKWA_HOME}/var/run/PbsNodes-data-loader.pid ]; then
-    kill -TERM `cat ${CHUKWA_HOME}/var/run/PbsNodes-data-loader.pid`
-  fi
-  echo "done"
-  exit 0
-fi
-
-EXISTS=0
-pidFile="${CHUKWA_HOME}/var/run/PbsNodes-data-loader.pid"
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`${JPS} | grep ${pid} | grep Exec | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -ge 1 ]; then
-    EXISTS=1
-  fi
-fi
-
-if [ ${EXISTS} -lt 1 ]; then
-    ${JAVA_HOME}/bin/java -DPERIOD=600 -DCHUKWA_HOME=${CHUKWA_HOME} -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -DCHUKWA_LOG_DIR=${CHUKWA_LOG_DIR} -DRECORD_TYPE=PbsNodes -Dlog4j.configuration=system-data-loader.properties -classpath ${CLASSPATH}:${CHUKWA_CORE}:${HADOOP_JAR}:${COMMON}:${TOOLS}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.inputtools.plugin.metrics.Exec "${nodeActivityCmde}" &
-fi
-
-

+ 0 - 155
src/contrib/chukwa/bin/processSinkFiles.sh

@@ -1,155 +0,0 @@
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-pid=$$
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-. "$bin"/chukwa-config.sh
-
-if [ "$CHUKWA_IDENT_STRING" = "" ]; then
-  export CHUKWA_IDENT_STRING="$USER"
-fi
-
-trap 'remove_cron;rm -f $CHUKWA_HOME/var/run/chukwa-$CHUKWA_IDENT_STRING-processSinkFiles.sh.pid ${CHUKWA_HOME}/var/run/ProcessSinkFiles.pid; exit 0' 1 2 15
-echo "${pid}" > "$CHUKWA_HOME/var/run/ProcessSinkFiles.pid"
-
-HADOOP_CMDE="${HADOOP_HOME}/bin/hadoop "
-
-function remove_cron {
-    mkdir -p ${CHUKWA_HOME}/var/tmp >&/dev/null
-    crontab -l | grep -v ${CHUKWA_HOME}/bin/hourlyRolling.sh > ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}
-    cat /tmp/cron.${CURRENT_DATE} | grep -v ${CHUKWA_HOME}/bin/dailyRolling.sh > ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}.2
-    crontab ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}.2
-    rm -f ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}
-    rm -f ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}.2
-}
-
-function add_cron {
-    mkdir -p ${CHUKWA_HOME}/var/tmp >&/dev/null
-    crontab -l > ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}
-    crontest=$?
-
-    if [ "X${crontest}" != "X0" ]; then
-      cat > ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE} << CRON
-16 * * * * ${CHUKWA_HOME}/bin/hourlyRolling.sh >& ${CHUKWA_HOME}/logs/hourly.log
-30 1 * * * ${CHUKWA_HOME}/bin/dailyRolling.sh >& ${CHUKWA_HOME}/logs/dailyRolling.log
-CRON
-    else
-      grep -v "${CHUKWA_HOME}/bin/hourlyRolling.sh" ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}  | grep -v "${CHUKWA_HOME}/bin/dailyRolling.sh" > ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}.2
-      mv ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}.2 ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}
-      cat >> ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE} << CRON
-16 * * * * ${CHUKWA_HOME}/bin/hourlyRolling.sh >& ${CHUKWA_HOME}/logs/hourly.log
-30 1 * * * ${CHUKWA_HOME}/bin/dailyRolling.sh >& ${CHUKWA_HOME}/logs/dailyRolling.log
-CRON
-    fi
-
-    # save crontab
-    echo -n "Registering cron jobs.."
-    crontab ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE} > /dev/null 2>&1
-    rm -f ${CHUKWA_HOME}/var/tmp/cron.${CURRENT_DATE}
-    echo "done"
-}
-
-if [ "X$1" = "Xstop" ]; then
-  echo -n "Shutting down processSinkFiles.sh..."
-  kill -TERM `cat ${CHUKWA_HOME}/var/run/ProcessSinkFiles.pid`
-  echo "done"
-  exit 0
-fi
-
-if [ "X$1" = "Xwatchdog" ]; then
-  add_cron
-fi
-
-while [ 1 ]
- do
-  debugDate=`date `
-  startTime=`date +%s`
-  now=`date +%Y%m%d_%H_%M%S`
-  strDate=`date +%Y%m%d_%H_%M%S`
-  srcDoneHdfsDir="/chukwa/postprocess/srcSink$now/"
-  
-  
-  destArchiveDir=`date +%Y%m%d/%H/%M%S`
-
-  echo "Running $strDate $now" >> "${CHUKWA_LOG_DIR}/mr.log"
-
-  echo "srcDoneHdfsDir: $srcDoneHdfsDir " >> "${CHUKWA_LOG_DIR}/mr.log"
-
-  $HADOOP_CMDE dfs -mkdir $srcDoneHdfsDir/doneFile
-  echo "done with mkdir" >> "${CHUKWA_LOG_DIR}/mr.log"
- 
-  $HADOOP_CMDE dfs -mv "/chukwa/logs/*/*.done" ${srcDoneHdfsDir}/doneFile
-  endMoveTime=`date +%s`
-  moveDuration=$(( $endMoveTime - $startTime))
-  echo "moveDuration $moveDuration" >> "${CHUKWA_LOG_DIR}/mr.log"
-  debugDate=`date `
-  echo "$debugDate done with mv logs" >> "${CHUKWA_LOG_DIR}/mr.log"
- 
-  # Build the archive
-  $HADOOP_CMDE jar  ${CHUKWA_CORE} org.apache.hadoop.chukwa.extraction.archive.ChukwaArchiveBuilder Stream ${srcDoneHdfsDir}/doneFile /chukwa/archives/raw/${destArchiveDir}
-  endArchiveTime=`date +%s`
-  archiveDuration=$(( $endArchiveTime - $endMoveTime))
-  echo "archiveDuration $archiveDuration" >> "${CHUKWA_LOG_DIR}/mr.log"
-  debugDate=`date `
-  echo "$debugDate done with chuckwaArchiveBuilder" >> "${CHUKWA_LOG_DIR}/mr.log"
-  
-  
-  ## Archive available call all processors
-  
-  
-  $HADOOP_CMDE jar  ${CHUKWA_CORE} org.apache.hadoop.chukwa.extraction.demux.Demux -Dmapred.compress.map.output=true -Dmapred.map.output.compression.codec=org.apache.hadoop.io.compress.LzoCodec -Dmapred.output.compress=true -Dmapred.output.compression.type=BLOCK -r 4 /chukwa/archives/raw/${destArchiveDir} ${srcDoneHdfsDir}/demux
-  endDemuxTime=`date +%s`
-  demuxDuration=$(( $endDemuxTime - $endArchiveTime))
-  echo "demuxDuration $demuxDuration" >> "${CHUKWA_LOG_DIR}/mr.log"
-  debugDate=`date `
-  echo "$debugDate done with demux job" >> "${CHUKWA_LOG_DIR}/mr.log"
-   
-  ${JAVA_HOME}/bin/java -DCHUKWA_HOME=${CHUKWA_HOME} -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -DCHUKWA_LOG_DIR=${CHUKWA_LOG_DIR} -Dlog4j.configuration=log4j.properties -classpath ${CLASSPATH}:${CHUKWA_CORE}:${HADOOP_JAR}:${COMMON}:${tools}:${CHUKWA_HOME}/conf org.apache.hadoop.chukwa.extraction.database.DatabaseLoader "${srcDoneHdfsDir}/demux" SystemMetrics Df Hadoop_dfs Hadoop_jvm Hadoop_mapred Hadoop_rpc MSSRGraph MRJobCounters NodeActivity HodJob HodMachine Hadoop_dfs_FSDirectory Hadoop_dfs_FSNamesystem Hadoop_dfs_datanode Hadoop_dfs_namenode Hadoop_jvm_metrics Hadoop_mapred_job Hadoop_mapred_jobtracker Hadoop_mapred_shuffleOutput Hadoop_mapred_tasktracker Hadoop_rpc_metrics
-  endDbLoaderTime=`date +%s`
-  dbLoaderDuration=$(( $endDbLoaderTime - $endDemuxTime))
-  echo "dbLoaderDuration $dbLoaderDuration" >> "${CHUKWA_LOG_DIR}/mr.log"
-  debugDate=`date `
-  echo "$debugDate done with dbLoader job" >> "${CHUKWA_LOG_DIR}/mr.log"
-   
-  $HADOOP_CMDE jar ${CHUKWA_CORE} org.apache.hadoop.chukwa.extraction.demux.MoveToRepository ${srcDoneHdfsDir}/demux ${chuwaRecordsRepository}
-  endMoveToRepoTime=`date +%s`
-  moveToRepoDuration=$(( $endMoveToRepoTime - $endDbLoaderTime))
-  echo "moveToRepoDuration $moveToRepoDuration" >> "${CHUKWA_LOG_DIR}/mr.log"
-  debugDate=`date `
-  echo "$debugDate done with MoveToRepository" >> "${CHUKWA_LOG_DIR}/mr.log"
-  
-  now=`date +%s`
-  strDate=`date +%m/%d/%y%n`
-  debugDate=`date `
-  echo "$debugDate Stopping ${strDate} ${now}" >> "${CHUKWA_LOG_DIR}/mr.log"
-  
-  endTime=`date +%s`
-  duration=$(( $endTime - $startTime))
-  echo "Duration: $duration s" >> "${CHUKWA_LOG_DIR}/mr.log"
-  
-  if [ $duration -lt 300 ]; then
-   sleepTime=$(( 300 - $duration)) 
-   echo "Sleep: $sleepTime s" >> "${CHUKWA_LOG_DIR}/mr.log"
-   SLEEP_COUNTER=`expr $sleepTime / 5`
-   while [ $SLEEP_COUNTER -gt 1 ]; do
-       sleep 5
-       SLEEP_COUNTER=`expr $SLEEP_COUNTER - 1`
-   done
-  fi
-done
-

+ 0 - 0
src/contrib/chukwa/bin/shutdown.sh


+ 0 - 68
src/contrib/chukwa/bin/slaves.sh

@@ -1,68 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Run a shell command on all slave hosts.
-#
-# Environment Variables
-#
-#   CHUKWA_SLAVES    File naming remote hosts.
-#     Default is ${CHUKWA_CONF_DIR}/chukwa-agents.
-#   CHUKWA_CONF_DIR  Alternate conf dir. Default is ${CHUKWA_HOME}/conf.
-#   CHUKWA_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
-#   CHUKWA_SSH_OPTS Options passed to ssh when running remote commands.
-##
-
-usage="Usage: slaves.sh [--config confdir] command..."
-
-# if no args specified, show usage
-if [ $# -le 0 ]; then
-  echo $usage
-  exit 1
-fi
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-# If the slaves file is specified in the command line,
-# then it takes precedence over the definition in 
-# hadoop-env.sh. Save it here.
-HOSTLIST=$CHUKWA_SLAVES
-
-if [ -f "${CHUKWA_CONF_DIR}/chukwa-env.sh" ]; then
-  . "${CHUKWA_CONF_DIR}/chukwa-env.sh"
-fi
-
-if [ "$HOSTLIST" = "" ]; then
-  if [ "$CHUKWA_SLAVES" = "" ]; then
-    export HOSTLIST="${CHUKWA_CONF_DIR}/chukwa-agents"
-  else
-    export HOSTLIST="${CHUKWA_SLAVES}"
-  fi
-fi
-
-for slave in `cat "$HOSTLIST"`; do
- ssh $CHUKWA_SSH_OPTS $slave $"${@// /\\ }" \
-   2>&1 | sed "s/^/$slave: /" &
- if [ "$CHUKWA_SLAVE_SLEEP" != "" ]; then
-   sleep $CHUKWA_SLAVE_SLEEP
- fi
-done
-
-wait

+ 0 - 31
src/contrib/chukwa/bin/start-agents.sh

@@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script is modeled after the parallel script in Hadoop
-# Start chukwa agent daemons on cluster slaves
-# Run this on a collector node.
-
-usage="Usage: start-agent.sh"
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-# start chukwa agents
-"$bin"/chukwa-daemons.sh --config $CHUKWA_CONF_DIR --hosts slaves start agent.sh

+ 0 - 36
src/contrib/chukwa/bin/start-all.sh

@@ -1,36 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Start all chukwa daemons.  Run this on master node.
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-# start collectors
-"$bin"/start-collectors.sh --config $CHUKWA_CONF_DIR
-
-# start agents
-"$bin"/start-agents.sh --config $CHUKWA_CONF_DIR
-
-# start probes
-"$bin"/start-probes.sh --config $CHUKWA_CONF_DIR
-
-# start data processors
-"$bin"/start-data-processors.sh --config $CHUKWA_CONF_DIR

+ 0 - 31
src/contrib/chukwa/bin/start-collectors.sh

@@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Start hadoop dfs daemons.
-# Optinally upgrade or rollback dfs state.
-# Run this on master node.
-
-usage="Usage: start-collectors.sh"
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-# start jetty collectors
-"$bin"/chukwa-daemons.sh --config $CHUKWA_CONF_DIR --hosts collectors --watchdog start jettyCollector.sh

+ 0 - 39
src/contrib/chukwa/bin/start-data-processors.sh

@@ -1,39 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-java=$JAVA_HOME/bin/java
-
-. "$bin"/chukwa-config.sh
-if [ ! -d ${CHUKWA_HOME}/opt/apache-tomcat-6.0.16 ]; then
-  if [ -f ${CHUKWA_HOME}/opt/apache-tomcat-6.0.16.tar.gz ]; then
-    tar fxz ${CHUKWA_HOME}/opt/apache-tomcat-6.0.16.tar.gz -C ${CHUKWA_HOME}/opt
-  fi
-fi
-
-if [ ! -f ${CHUKWA_HOME}/opt/apache-tomcat-6.0.16/webapps/hicc-${CHUKWA_VERSION}.war ]; then
-  if [ -f ${CHUKWA_HOME}/hicc-${CHUKWA_VERSION}.war ]; then
-    cp ${CHUKWA_HOME}/hicc-${CHUKWA_VERSION}.war ${CHUKWA_HOME}/opt/apache-tomcat-6.0.16/webapps
-  fi
-fi 
-
-# start data processors
-"$bin"/chukwa-daemon.sh --config $CHUKWA_CONF_DIR --watchdog start processSinkFiles.sh watchdog
-
-# start database admin script
-"$bin"/chukwa-daemon.sh --config $CHUKWA_CONF_DIR start dbAdmin.sh

+ 0 - 39
src/contrib/chukwa/bin/start-probes.sh

@@ -1,39 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Start hadoop dfs daemons.
-# Optinally upgrade or rollback dfs state.
-# Run this on master node.
-
-usage="Usage: start-probes.sh"
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-# start system data loader daemons
-"$bin"/chukwa-daemons.sh --config $CHUKWA_CONF_DIR start systemDataLoader.sh
-
-# start torque data loader daemons
-if [ "x${TORQUE_HOME}" != "x" ]; then
-  "$bin"/chukwa-daemon.sh --config $CHUKWA_CONF_DIR start torqueDataLoader.sh
-fi
-if [ "x${nodeActivityCmde}" != "x" ]; then
-  "$bin"/chukwa-daemon.sh --config $CHUKWA_CONF_DIR start nodeActivityDataLoader.sh
-fi

+ 0 - 0
src/contrib/chukwa/bin/startup.sh


+ 0 - 26
src/contrib/chukwa/bin/stop-agents.sh

@@ -1,26 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Stop collectors.  Run this on master node.
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-"$bin"/chukwa-daemons.sh --config $CHUKWA_CONF_DIR --hosts slaves --watchdog stop agent.sh

+ 0 - 29
src/contrib/chukwa/bin/stop-all.sh

@@ -1,29 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Stop all chukwa daemons.  Run this on master node.
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-"$bin"/stop-probes.sh --config $CHUKWA_CONF_DIR
-"$bin"/stop-data-processors.sh --config $CHUKWA_CONF_DIR
-"$bin"/stop-agents.sh --config $CHUKWA_CONF_DIR
-"$bin"/stop-collectors.sh --config $CHUKWA_CONF_DIR

+ 0 - 26
src/contrib/chukwa/bin/stop-collectors.sh

@@ -1,26 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Stop collectors.  Run this on master node.
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-"$bin"/chukwa-daemons.sh --config $CHUKWA_CONF_DIR --hosts collectors --watchdog stop jettyCollector.sh

+ 0 - 75
src/contrib/chukwa/bin/stop-data-processors.sh

@@ -1,75 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-java=$JAVA_HOME/bin/java
-
-. "$bin"/chukwa-config.sh
-
-# stop processSinkFiles.sh
-pidFile=$CHUKWA_PID_DIR/ProcessSinkFiles.pid
-if [ -f $pidFile ]; then  
-   echo -n "Shutting down Data Processors.."
-   DP_PID=`head ${pidFile}`
-   kill -TERM ${DP_PID}
-   for i in 1 2 5; do
-       test_pid=`ps ax | grep ${DP_PID} | grep -v grep | grep processSinkFiles.sh | wc -l`
-       if [ $test_pid -ge 1 ]; then
-           sleep $i
-           kill -TERM ${DP_PID}
-       else
-           break
-       fi
-   done
-   test_pid=`ps ax | grep ${DP_PID} | grep -v grep | grep processSinkFiles.sh | wc -l`
-   if [ $test_pid -ge 1 ]; then
-       kill -9 ${DBADMIN_PID} &>/dev/null
-   fi
-   rm -f ${pidFile}
-   rm -f $CHUKWA_PID_DIR/chukwa-$CHUKWA_IDENT_STRING-processSinkFiles.sh.pid
-   echo "done"
-else
-  echo " no $pidFile"
-fi
-
-# stop dbAdmin.sh
-pidFile=$CHUKWA_PID_DIR/dbAdmin.pid
-if [ -f $pidFile ]; then  
-   echo -n "Shutting down Database Admin.."
-   DBADMIN_PID=`head ${pidFile}`
-   kill -TERM ${DBADMIN_PID}
-   for i in 1 2 5; do
-       test_pid=`ps ax | grep ${DBADMIN_PID} | grep -v grep | grep dbAdmin.sh | wc -l`
-       if [ $test_pid -ge 1 ]; then
-           sleep $i
-           kill -TERM ${DBADMIN_PID}
-       else
-           break
-       fi
-   done
-   test_pid=`ps ax | grep ${DBADMIN_PID} | grep -v grep | grep dbAdmin.sh | wc -l`
-   if [ $test_pid -ge 1 ]; then
-       kill -9 ${DBADMIN_PID} &>/dev/null
-   fi
-   rm -f ${pidFile}
-   rm -f $CHUKWA_PID_DIR/chukwa-$CHUKWA_IDENT_STRING-dbAdmin.sh.pid
-   echo "done"
-else
-  echo " no $pidFile"
-fi
-

+ 0 - 28
src/contrib/chukwa/bin/stop-probes.sh

@@ -1,28 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Stop probes.  Run this on master node.
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-"$bin"/chukwa-daemons.sh --config $CHUKWA_CONF_DIR stop systemDataLoader.sh
-"$bin"/chukwa-daemon.sh --config $CHUKWA_CONF_DIR stop torqueDataLoader.sh
-"$bin"/chukwa-daemon.sh --config $CHUKWA_CONF_DIR stop nodeActivityDataLoader.sh

+ 0 - 141
src/contrib/chukwa/bin/systemDataLoader.sh

@@ -1,141 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-JVM_OPTS="-Xms4M -Xmx4M"
-
-trap 'shutdown' 1 2 15
-
-function shutdown {
-  echo -n "Shutting down System Data Loader..."
-  if [ -f ${CHUKWA_HOME}/var/run/Sar-data-loader.pid ]; then
-    kill -9 `cat ${CHUKWA_HOME}/var/run/Sar-data-loader.pid`
-  fi
-  if [ -f ${CHUKWA_HOME}/var/run/Iostat-data-loader.pid ]; then
-    kill -9 `cat ${CHUKWA_HOME}/var/run/Iostat-data-loader.pid`
-  fi
-  if [ -f ${CHUKWA_HOME}/var/run/Top-data-loader.pid ]; then
-    kill -9 `cat ${CHUKWA_HOME}/var/run/Top-data-loader.pid`
-  fi
-  if [ -f ${CHUKWA_HOME}/var/run/Df-data-loader.pid ]; then
-    kill -9 `cat ${CHUKWA_HOME}/var/run/Df-data-loader.pid`
-  fi
-  if [ -f ${CHUKWA_HOME}/var/run/Netstat-data-loader.pid ]; then
-    kill -9 `cat ${CHUKWA_HOME}/var/run/Netstat-data-loader.pid`
-  fi
-  rm -f $CHUKWA_HOME/var/run/chukwa-$CHUKWA_IDENT_STRING-systemDataLoader.sh.pid
-  echo "done"
-  exit 0
-}
-
-if [ "X$1" = "Xstop" ]; then
-  echo -n "Shutting down System Data Loader..."
-  if [ -f $CHUKWA_HOME/var/run/chukwa-$CHUKWA_IDENT_STRING-systemDataLoader.sh.pid ]; then
-    kill -TERM `head $CHUKWA_HOME/var/run/chukwa-$CHUKWA_IDENT_STRING-systemDataLoader.sh.pid`
-  fi
-  echo "done"
-  exit 0
-fi
-
-echo -n "Starting System Data Loader..."
-
-#test=`grep -q SysLog ${CHUKWA_HOME}/var/chukwa_checkpoint*`
-#if [ "X${test}"="X1" ]; then
-#  echo "add org.apache.hadoop.chukwa.datacollection.adaptor.filetailer.CharFileTailingAdaptorUTF8NewLineEscaped SysLog 0 /var/log/messages 0" | nc localhost 9093 >&/dev/null & disown -h 
-#fi
-
-EXISTS=0
-pidFile="${CHUKWA_HOME}/var/run/Sar-data-loader.pid"
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`${JPS} | grep ${pid} | grep Exec | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -ge 1 ]; then
-    EXISTS=1
-  fi
-fi
-
-if [ ${EXISTS} -lt 1 ]; then
-    ${JAVA_HOME}/bin/java $JVM_OPTS -DPERIOD=60 -DCHUKWA_HOME=${CHUKWA_HOME} -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -DCHUKWA_LOG_DIR=${CHUKWA_LOG_DIR} -DRECORD_TYPE=Sar -Dlog4j.configuration=system-data-loader.properties -classpath ${CLASSPATH}:${CHUKWA_CORE}:${HADOOP_JAR}:${COMMON}:${TOOLS}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.inputtools.plugin.metrics.Exec sar -q -r -n FULL 55 &
-fi
-
-EXISTS=0
-pidFile="${CHUKWA_HOME}/var/run/Iostat-data-loader.pid"
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`${JPS} | grep ${pid} | grep Exec | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -ge 1 ]; then
-    EXISTS=1
-  fi
-fi
-
-if [ ${EXISTS} -lt 1 ]; then
-  ${JAVA_HOME}/bin/java $JVM_OPTS -DPERIOD=60 -DCHUKWA_HOME=${CHUKWA_HOME} -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -DCHUKWA_LOG_DIR=${CHUKWA_LOG_DIR} -DRECORD_TYPE=Iostat -Dlog4j.configuration=system-data-loader.properties -classpath ${CLASSPATH}:${CHUKWA_CORE}:${HADOOP_JAR}:${COMMON}:${TOOLS}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.inputtools.plugin.metrics.Exec iostat -x 55 2 &
-fi
-
-EXISTS=0
-pidFile="${CHUKWA_HOME}/var/run/Top-data-loader.pid"
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`${JPS} | grep ${pid} | grep Exec | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -ge 1 ]; then
-    EXISTS=1
-  fi
-fi
-
-if [ ${EXISTS} -lt 1 ]; then
-  ${JAVA_HOME}/bin/java $JVM_OPTS -DPERIOD=60 -DCHUKWA_HOME=${CHUKWA_HOME} -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -DCHUKWA_LOG_DIR=${CHUKWA_LOG_DIR} -DRECORD_TYPE=Top -Dlog4j.configuration=system-data-loader.properties -classpath ${CLASSPATH}:${CHUKWA_CORE}:${HADOOP_JAR}:${COMMON}:${TOOLS}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.inputtools.plugin.metrics.Exec top -b -n 1 -c &
-fi
-
-EXISTS=0
-pidFile="${CHUKWA_HOME}/var/run/Df-data-loader.pid"
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`${JPS} | grep ${pid} | grep Exec | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -ge 1 ]; then
-    EXISTS=1
-  fi
-fi
-
-if [ ${EXISTS} -lt 1 ]; then
-  ${JAVA_HOME}/bin/java $JVM_OPTS -DPERIOD=60 -DCHUKWA_HOME=${CHUKWA_HOME} -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -DCHUKWA_LOG_DIR=${CHUKWA_LOG_DIR} -DRECORD_TYPE=Df -Dlog4j.configuration=system-data-loader.properties -classpath ${CLASSPATH}:${CHUKWA_CORE}:${HADOOP_JAR}:${COMMON}:${TOOLS}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.inputtools.plugin.metrics.Exec df -l &
-fi
-
-EXISTS=0
-pidFile="${CHUKWA_HOME}/var/run/Netstat-data-loader.pid"
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`${JPS} | grep ${pid} | grep Exec | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -ge 1 ]; then
-    EXISTS=1
-  fi
-fi
-
-if [ ${EXISTS} -lt 1 ]; then
-  ${JAVA_HOME}/bin/java $JVM_OPTS -DPERIOD=60 -DCHUKWA_HOME=${CHUKWA_HOME} -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -DCHUKWA_LOG_DIR=${CHUKWA_LOG_DIR} -DRECORD_TYPE=Netstat -Dlog4j.configuration=system-data-loader.properties -classpath ${CLASSPATH}:${CHUKWA_CORE}:${HADOOP_JAR}:${COMMON}:${TOOLS}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.inputtools.plugin.metrics.Exec ${CHUKWA_HOME}/bin/netstat.sh &
-fi
-
-echo "done"
-
-while [ 1 ]
-do
-    # sleep until shutdown signal has been sent.
-    sleep 5
-done

+ 0 - 48
src/contrib/chukwa/bin/torqueDataLoader.sh

@@ -1,48 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-java=$JAVA_HOME/bin/java
-
-if [ "X$1" = "Xstop" ]; then
-  echo -n "Shutting down Torque Data Loader..."
-  if [ -f ${CHUKWA_HOME}/var/run/TorqueDataLoader.pid ]; then
-    kill -TERM `cat ${CHUKWA_HOME}/var/run/TorqueDataLoader.pid`
-    rm -f ${CHUKWA_HOME}/var/run/TorqueDataLoader.pid
-  fi
-  echo "done"
-  exit 0
-fi
-
-min=`date +%M`
-
-
-# start torque data loader
-pidFile=$CHUKWA_HOME/var/run/TorqueDataLoader.pid
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`${JPS} | grep ${pid} | grep TorqueDataLoader | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -lt 1 ]; then
-      ${java} -DDOMAIN=${DOMAIN} -DTORQUE_SERVER=${TORQUE_SERVER} -DTORQUE_HOME=${TORQUE_HOME} -DCHUKWA_HOME=${CHUKWA_HOME} -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -DCHUKWA_LOG_DIR=${CHUKWA_LOG_DIR} -DRECORD_TYPE=Torque -Dlog4j.configuration=system-data-loader.properties -classpath ${CLASSPATH}:${CHUKWA_CORE}:${COMMON}:${HADOOP_JAR}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.inputtools.mdl.TorqueDataLoader&
-  fi 
-else
-      ${java} -DDOMAIN=${DOMAIN} -DTORQUE_SERVER=${TORQUE_SERVER} -DTORQUE_HOME=${TORQUE_HOME} -DCHUKWA_HOME=${CHUKWA_HOME} -DCHUKWA_CONF_DIR=${CHUKWA_CONF_DIR} -DCHUKWA_LOG_DIR=${CHUKWA_LOG_DIR} -DRECORD_TYPE=Torque -Dlog4j.configuration=system-data-loader.properties -classpath ${CLASSPATH}:${CHUKWA_CORE}:${COMMON}:${HADOOP_JAR}:${CHUKWA_CONF_DIR} org.apache.hadoop.chukwa.inputtools.mdl.TorqueDataLoader&
-fi

+ 0 - 48
src/contrib/chukwa/bin/validateDemux.sh

@@ -1,48 +0,0 @@
-#!/bin/sh
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-pid=$$
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-echo "hadoop jar for agent is " ${HADOOP_JAR}
-now=`date +%Y%m%d_%H_%M%S`
-hdfsDir="/test_$now/"
-
-HADOOP_CMDE="${HADOOP_HOME}/bin/hadoop "
-
-$HADOOP_CMDE dfs -mkdir ${hdfsDir}
-
-echo "Moving data to HDFS: ${hdfsDir}"
-
-$HADOOP_CMDE dfs -put ${CHUKWA_HOME}/data/demuxData ${hdfsDir}/
-
-echo "demuxTestData: "
-$HADOOP_CMDE dfs -ls ${hdfsDir}/demuxData/input
-exitCode=$?
-echo "ls ExitCode: ${exitCode} "
- 
-$HADOOP_CMDE jar  ${CHUKWA_CORE} org.apache.hadoop.chukwa.extraction.demux.Demux -Dmapred.compress.map.output=true -Dmapred.map.output.compression.codec=org.apache.hadoop.io.compress.LzoCodec -Dmapred.output.compress=true -Dmapred.output.compression.type=BLOCK -r 4 ${hdfsDir}/demuxData/input ${hdfsDir}/demuxData/output
-exitCode=$?
-echo "Demux ExitCode: ${exitCode} "
-
-${JAVA_HOME}/bin/java -Xms10M -Xmx32M -classpath /tmp/chukwaTest.jar:${CLASSPATH}:${HADOOP_JAR}:${COMMON} org.apache.hadoop.chukwa.validationframework.DemuxDirectoryValidator -hdfs ${hdfsDir}/demuxData/gold ${hdfsDir}/demuxData/output
-exitCode=$?
-echo "Validation ExitCode: ${exitCode} "
-

+ 0 - 147
src/contrib/chukwa/bin/watchdog.sh

@@ -1,147 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-. "$bin"/chukwa-config.sh
-
-java=$JAVA_HOME/bin/java
-
-
-min=`date +%M`
-
-if [ "$CHUKWA_IDENT_STRING" = "" ]; then
-  export CHUKWA_IDENT_STRING="$USER"
-fi
-
-# monitor agent
-pidFile=$CHUKWA_HOME/var/run/chukwa-$CHUKWA_IDENT_STRING-agent.sh.pid
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`ps ax | grep ${pid} | grep agent.sh | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -lt 1 ]; then
-      HOSTNAME=`hostname`
-      echo "${HOSTNAME}: agent pid file exists, but process missing.  Restarting agent.sh."
-      "$bin/chukwa-daemon.sh" --config $CHUKWA_CONF_DIR start agent.sh &
-  fi 
-fi
-
-# monitor collector
-pidFile=$CHUKWA_HOME/var/run/chukwa-$CHUKWA_IDENT_STRING-jettyCollector.sh.pid
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`ps ax | grep ${pid} | grep jettyCollector.sh | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -lt 1 ]; then
-      HOSTNAME=`hostname`
-      echo "${HOSTNAME}: collector pid file exists, but process missing.  Restarting jettyCollector.sh."
-      "$bin/chukwa-daemon.sh" --config $CHUKWA_CONF_DIR start jettyCollector.sh &
-  fi
-fi
-
-# monitor node activity data loader
-pidFile=$CHUKWA_HOME/var/run/PbsNodes-data-loader.pid
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`${JPS} | grep ^${pid} | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -lt 1 ]; then
-      HOSTNAME=`hostname`
-      echo "${HOSTNAME}: PbsNodes-data-loader pid file exists, but process missing.  Restarting nodeActivityDataLoader.sh."
-      "$bin/chukwa-daemon.sh" --config $CHUKWA_CONF_DIR start nodeActivityDataLoader.sh &
-  fi
-fi
-
-# monitor system data loader
-pidFile=$CHUKWA_HOME/var/run/Df-data-loader.pid
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`${JPS} | grep ^${pid} | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -lt 1 ]; then
-      HOSTNAME=`hostname`
-      echo "${HOSTNAME}: Df-data-loader pid file exists, but process missing.  Restarting systemDataLoader.sh."
-      "$bin/chukwa-daemon.sh" --config $CHUKWA_CONF_DIR start systemDataLoader.sh &
-  fi
-fi
-
-pidFile=$CHUKWA_HOME/var/run/Iostat-data-loader.pid
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`${JPS} | grep ^${pid} | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -lt 1 ]; then
-      HOSTNAME=`hostname`
-      echo "${HOSTNAME}: Iostat-data-loader pid file exists, but process missing.  Restarting systemDataLoader.sh."
-      "$bin/chukwa-daemon.sh" --config $CHUKWA_CONF_DIR start systemDataLoader.sh &
-  fi
-fi
-
-pidFile=$CHUKWA_HOME/var/run/Sar-data-loader.pid
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`${JPS} | grep ^${pid} | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -lt 1 ]; then
-      HOSTNAME=`hostname`
-      echo "${HOSTNAME}: Sar-data-loader pid file exists, but process missing.  Restarting systemDataLoader.sh."
-      "$bin/chukwa-daemon.sh" --config $CHUKWA_CONF_DIR start systemDataLoader.sh &
-  fi
-fi
-
-pidFile=$CHUKWA_HOME/var/run/Top-data-loader.pid
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`${JPS} | grep ^${pid} | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -lt 1 ]; then
-      HOSTNAME=`hostname`
-      echo "${HOSTNAME}: Top-data-loader pid file exists, but process missing.  Restarting systemDataLoader.sh."
-      "$bin/chukwa-daemon.sh" --config $CHUKWA_CONF_DIR start systemDataLoader.sh &
-  fi
-fi
-
-# monitor torque data loader
-pidFile=$CHUKWA_HOME/var/run/TorqueDataLoader.pid
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`${JPS} | grep ^${pid} | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -lt 1 ]; then
-      HOSTNAME=`hostname`
-      echo "${HOSTNAME}: pid file exists, but process missing.  Restarting torqueDataLoader.sh."
-      "$bin/chukwa-daemon.sh" --config $CHUKWA_CONF_DIR start torqueDataLoader.sh &
-  fi
-fi
-
-# monitor dataSinkFiles.sh
-pidFile=$CHUKWA_HOME/var/run/chukwa-$CHUKWA_IDENT_STRING-processSinkFiles.sh.pid
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`ps ax | grep ${pid} | grep processSinkFiles.sh | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -lt 1 ]; then
-      HOSTNAME=`hostname`
-      echo "${HOSTNAME}: pid file exists, but process missing.  Restarting processSinkFiles.sh."
-      "$bin/chukwa-daemon.sh" --config $CHUKWA_CONF_DIR start processSinkFiles.sh &
-  fi
-fi
-
-# monitor dbAdmin.sh
-pidFile=$CHUKWA_HOME/var/run/chukwa-$CHUKWA_IDENT_STRING-dbAdmin.sh.pid
-if [ -f $pidFile ]; then
-  pid=`head ${pidFile}`
-  ChildPIDRunningStatus=`ps ax | grep ${pid} | grep dbAdmin.sh | grep -v grep | wc -l`
-  if [ $ChildPIDRunningStatus -lt 1 ]; then
-      HOSTNAME=`hostname`
-      echo "${HOSTNAME}: pid file exists, but process missing.  Restarting dbAdmin.sh."
-      "$bin/chukwa-daemon.sh" --config $CHUKWA_CONF_DIR start dbAdmin.sh &
-  fi
-fi

+ 0 - 851
src/contrib/chukwa/build.xml

@@ -1,851 +0,0 @@
-<?xml version="1.0" ?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-
-<project name="chukwa" default="main" 
-	xmlns:ivy="antlib:org.apache.ivy.ant">
-
-	<property name="name" value="chukwa"/>
-        <property name="chukwaVersion" value="0.1.1"/>
-	<property name="final.name" value="${name}-${chukwaVersion}"/>
-
-        <import file="../build-contrib.xml" optional="true"/>
-        <available file="../build-contrib.xml" property="present"/>
-
-        <property file="build.properties"/>
-        <property file="default.properties"/>
-
-        <condition property="standaloneMode">
-                <and><not><available file="../build-contrib.xml" property="present"/></not></and>
-        </condition>
-
-        <condition property="hadoopMode">
-                <and><available file="../build-contrib.xml" property="present"/></and>
-        </condition>
-
-        <target name="init-standalone" depends="ivy-retrieve" if="standaloneMode">
-          <echo>Standalone Mode</echo>
-          <property environment="env"/>
-	  <property name="basedir" value="."/>
-          <!--SET UP HADOOP JARS-->
-          <property name="hadoop.home.dir" value="${env.HADOOP_HOME}"/>
-          <echo message="HADOOP_HOME set to: ${hadoop.home.dir}"/>
-          <property name="hadoop.default.jars.dir" value="${basedir}/hadoopjars"/>
-
-          <condition property="hadoop.home.jars.dir" value="${hadoop.home.dir}/build" else="${basedir}/hadoopjars">
-                <available file="${hadoop.home.dir}/build"/>
-          </condition>
-          <echo message="hadoop.home.jars.dir set to ${hadoop.home.jars.dir}"/>
-
-          <property name="build.dir" value="${basedir}/build"/>
-          <property name="build.classes" value="${basedir}/build/classes"/>
-          <property name="test.build.dir" value="${build.dir}/test"/>
-          <property name="test.build.classes" value="${test.build.dir}/classes"/>
-          <property name="lib.dir" value="${basedir}/lib"/>
-          <path id="classpath">
-                  <fileset dir="${lib.dir}">
-                          <include name="**/*.jar" />
-                          <exclude name="**/excluded/" />
-                  </fileset>
-                  <fileset dir="${hadoop.default.jars.dir}">   <!-- ASR -->
-                          <include name="**/*.jar" />
-                          <exclude name="**/*core*.jar" />
-                  </fileset>
-                  <fileset dir="${hadoop.home.jars.dir}">   <!-- ASR -->
-                          <include name="**/*core*.jar" />
-                  </fileset>
-                  <path refid="contrib-classpath"/>
-          </path>
-          <path id="testClasspath">
-                  <pathelement location="${build.classes}"/>
-                  <pathelement location="${test.build.classes}"/>
-                  <fileset dir="${lib.dir}">
-                          <include name="**/*.jar" />
-                          <exclude name="**/excluded/" />
-                  </fileset>
-                  <fileset dir="${hadoop.default.jars.dir}">   <!-- ASR -->
-                          <include name="**/*.jar" />
-                          <exclude name="**/*core*.jar" />
-                  </fileset>
-                  <fileset dir="${hadoop.home.jars.dir}">   <!-- ASR -->
-                          <include name="**/*core*.jar" />
-                  </fileset>
-                  <path refid="contrib-classpath"/>
-          </path>
-          <path id="testDemuxClasspath">
-                  <pathelement location="${build.classes}"/>
-                  <pathelement location="${test.build.classes}"/>
-                  <fileset dir="${hadoop.jar}">
-                          <include name="**/*.jar" />
-                          <exclude name="**/excluded/" />
-                  </fileset>
-                  <fileset dir="${lib.dir}">
-                          <include name="**/*.jar" />
-                          <exclude name="**/excluded/" />
-                  </fileset>
-                  <path refid="contrib-classpath"/>
-          </path>
-        </target>
-
-	<target name="ivy-init-properties-local" description="to initiate ivy properties">
-		<property name="ivy.dir" location="ivy" />
-	  	<property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml"/>
-	  	<loadproperties srcfile="${ivy.dir}/libraries.properties"/>
-	  	<loadproperties srcfile="${ivy.dir}/libraries.properties"/>
-	  	<property name="ivy.jar" location="${ivy.dir}/ivy-${ivy.version}.jar"/>
-                <property name="ivy_repo_url" 
-			  value="http://repo2.maven.org/maven2/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar" />
-	  	<property name="build.dir" location="build" />
-	  	<property name="build.ivy.dir" location="${build.dir}/ivy" />
-	  	<property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib" />
-		<property name="build.ivy.report.dir" location="${build.ivy.dir}/report" />
-	  	<property name="common.ivy.lib.dir" location="${build.ivy.lib.dir}/${ant.project.name}/common"/> 
-
-	  	<!--this is the naming policy for artifacts we want pulled down-->
-	  	<property name="ivy.artifact.retrieve.pattern"
-    			value="${ant.project.name}/[conf]/[artifact]-[revision].[ext]"/>
-	</target>  
-
-        <target name="ivy-download-local" description="To download ivy"
-		unless="offline">
-    		<get src="${ivy_repo_url}" dest="${ivy.jar}" usetimestamp="true"/>
-	</target>
-
-	<target name="ivy-init-dirs-local" depends="ivy-init-properties-local">
-    		<mkdir dir="${build.ivy.dir}" />
-    		<mkdir dir="${build.ivy.lib.dir}" />
-    		<mkdir dir="${build.ivy.report.dir}" />
-	</target>
-
-  	<target name="ivy-probe-antlib-local" >
-    		<condition property="ivy.found.local">
-      			<typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
-    		</condition>
-	</target>
-
-	<target name="ivy-init-antlib-local" depends="ivy-init-dirs-local,ivy-download-local,ivy-probe-antlib-local" unless="ivy.found.local">
-    		<typedef uri="antlib:org.apache.ivy.ant" onerror="fail" loaderRef="ivyLoader">
-			<classpath>
-        			<pathelement location="${ivy.jar}"/>
-		      	</classpath>
-    		</typedef>
-    		<fail>
-      		<condition>
-		        <not>
-          			<typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
-		        </not>
-		</condition>
-			      You need Apache Ivy 2.0 or later from http://ant.apache.org/
-			      It could not be loaded from ${ivy_repo_url}
-		 </fail>
-	</target>
-
-  	<target name="ivy-init-local" depends="ivy-init-antlib-local">
-    		<ivy:configure settingsid="${ant.project.name}.ivy.settings" file="${ivysettings.xml}" override="true"/>
-	</target>
-
-	<target name="ivy-resolve" depends="ivy-init-local">
-    		<ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="common"/>
-  	</target>
-
-	<target name="ivy-retrieve" depends="ivy-resolve"
-		    description="Retrieve Ivy-managed artifacts for the compile/test configurations">
-		<ivy:retrieve settingsRef="${ant.project.name}.ivy.settings" 
-		 pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}" sync="true" />
-		<ivy:cachepath pathid="contrib-classpath" conf="common" />
-	</target>
-        
-        <target name="init-hadoop" if="hadoopMode">
-		<antcall target="ivy-retrieve-common"/>
-                <echo>Hadoop Mode</echo>
-	        <property name="build.dir" value="${basedir}/build"/>
-	        <property name="build.classes" value="${basedir}/build/classes"/>
-                <property name="test.build.dir" value="${build.dir}/test"/>
-                <property name="test.build.classes" value="${test.build.dir}/classes"/>
-                <delete file="${build.dir}/${final.name}/hadoop-*-core.jar" />
-	        <property name="lib.dir" value="${basedir}/lib"/>
-	        <path id="classpath">
-		        <fileset dir="${lib.dir}">
-			        <include name="**/*.jar" />
-			        <exclude name="**/excluded/" />
-		        </fileset>
-		        <fileset dir="${hadoop.root}/lib">
-			        <include name="**/*.jar" />
-			        <exclude name="**/excluded/" />
-		        </fileset>
-                        <pathelement location="${hadoop.root}/build/classes"/>
- 			<path refid="contrib-classpath"/>
-	        </path>
-                <path id="testClasspath">
-                        <pathelement location="${build.classes}"/>
-                        <pathelement location="${test.build.classes}"/>
- 			<path refid="contrib-classpath"/>
-                        <fileset dir="${lib.dir}">
-                                <include name="**/*.jar" />
-                                <exclude name="**/excluded/" />
-                        </fileset>
-		        <fileset dir="${hadoop.root}/lib">
-			        <include name="**/*.jar" />
-                                <exclude name="**/excluded/" />
-                        </fileset>
-                        <pathelement location="${hadoop.root}/build/classes"/>
-                        <pathelement location="${hadoop.root}/build/test/classes"/>
-                </path>
-
-                <path id="testDemuxClasspath">
-                        <pathelement location="${build.classes}"/>
-                        <pathelement location="${test.build.classes}"/>
- 			<path refid="contrib-classpath"/>
-                        <fileset dir="${hadoop.root}/lib">
-                                <include name="**/*.jar" />
-                                <exclude name="**/excluded/" />
-                        </fileset>
-                        <fileset dir="${lib.dir}">
-                                <include name="**/*.jar" />
-                                <exclude name="**/excluded/" />
-                        </fileset>
-                        <pathelement location="${hadoop.root}/build/classes"/>
-                        <pathelement location="${hadoop.root}/build/test/classes"/>
-                </path>
-
-        </target>
-
-	<target name="init" depends="init-standalone,init-hadoop">
-                <property name="src.dir" value="${basedir}/src"/>
-                <property name="build.classes" value="${build.dir}/classes"/>
-                <property name="conf.dir" value="${basedir}/conf"/>
-                <property name="docs.dir" value="${basedir}/docs"/>
-                <property name="tools.dir" value="${basedir}/tools"/>
-                <property name="dist.dir" value="${basedir}/dist"/>
-                <property name="opt.dir" value="${basedir}/opt"/>
-                <property name="javac.debug" value="on"/>
-                <property name="javac.version" value="1.6"/>
-                <property name="test.src.dir" value="${basedir}/src/test"/>
-                <property name="test.lib.dir" value="${basedir}/src/test/lib"/>
-                <property name="test.build.dir" value="${build.dir}/test"/>
-                <property name="test.generated.dir" value="${test.build.dir}/src"/>
-                <property name="test.build.data" value="${test.build.dir}/data"/>
-                <property name="test.cache.data" value="${test.build.dir}/cache"/>
-                <property name="test.debug.data" value="${test.build.dir}/debug"/>
-                <property name="test.log.dir" value="${test.build.dir}/logs"/>
-                <property name="test.build.classes" value="${test.build.dir}/classes"/>
-                <property name="test.build.testjar" value="${test.build.dir}/testjar"/>
-                <property name="test.include" value="Test*"/>
-                <property name="test.classpath.id" value="test.classpath"/>
-                <property name="test.output" value="yes"/>
-                <!--<property name="test.timeout" value="900000"/> -->
-                <property name="test.timeout" value="5000"/>
-                <property name="test.junit.output.format" value="plain"/>
-                <property name="test.junit.fork.mode" value="perTest" />
-                <property name="test.junit.printsummary" value="yes" />
-                <property name="test.junit.haltonfailure" value="yes" />
-                <property name="test.junit.maxmemory" value="256m" />
-		<mkdir dir="${build.dir}"/>
-		<mkdir dir="${build.classes}"/>
-		<mkdir dir="${build.dir}/test"/>
-		<exec executable="echo" output="${basedir}/bin/VERSION">
-			<arg line="${chukwaVersion}" />
-		</exec>
-
-		<mkdir dir="${dist.dir}"/>
-
-	</target>
-
-	<target name="main" depends="init, compile, compress" description="Main target">
-		<echo>
-            Building the .jar files.
-        </echo>
-	</target>
-
-	<target name="compile" depends="init" description="Compilation target">
-		<mkdir dir="${build.dir}"/>
-		<mkdir dir="${build.classes}"/>
-		<mkdir dir="${build.dir}/test"/>
-		<javac srcdir="src/java/org/apache/hadoop" destdir="${build.classes}" excludes="**/ChukwaTTInstru.java" debug="${javac.debug}">
-			<classpath refid="classpath" />
-		</javac>
-	</target>
-
-	<target name="compile-test" depends="init" description="Test target">
-	                      
-           <delete dir="${test.build.dir}"/>
-           <mkdir dir="${test.build.dir}"/>
-           <delete dir="${test.log.dir}"/>
-           <mkdir dir="${test.log.dir}"/>
-           <delete dir="${test.build.classes}"/>
-           <mkdir dir="${test.build.classes}"/>
-
-            <javac srcdir="${test.src.dir}/org/apache/hadoop/chukwa" destdir="${test.build.dir}/classes"  debug="${javac.debug}">
-                 <classpath refid="testClasspath" />
-                 <classpath refid="testDemuxClasspath" />
-            </javac>
-        </target>
-
-            <!--printsummary="${test.junit.printsummary}" -->
-
-	<target name="test-chukwa" depends="compile,compile-test" description="Run Chukwa unit tests">
-          <mkdir dir="${basedir}/var"/>
-          <junit showoutput="yes"
-            fork="yes"
-            printsummary="withOutAndErr"
-            forkmode="${test.junit.fork.mode}"
-            maxmemory="${test.junit.maxmemory}"
-            dir="${test.build.dir}/classes/" timeout="${test.timeout}"
-            errorProperty="tests.failed" failureProperty="tests.failed">
-           <classpath refid="testClasspath"/>
-           <sysproperty key="test.src.dir" value="${test.src.dir}"/>
-           <formatter type="${test.junit.output.format}" />
-           <batchtest todir="${test.build.dir}" unless="testcase">
-            <fileset dir="${test.src.dir}">
-              <include name="**/${test.include}.java"/>
-              <exclude name="**/${test.exclude}.java"/>
-            </fileset>
-           </batchtest>
-           <batchtest todir="${test.build.dir}" if="testcase">
-            <fileset dir="${test.src.dir}" includes="**/${testcase}.java"/>
-           </batchtest>
-         </junit>
-         <fail if="tests.failed">Tests failed!</fail>
-         <delete>
-            <fileset dir="${basedir}/var" includes="*"/>
-         </delete>
-        </target>
-
-	<target name="collector" depends="compile" description="Prepare collector.war">
-		<mkdir dir="${build.dir}/collector"/>
-		<mkdir dir="${build.dir}/collector/WEB-INF"/>
-		<mkdir dir="${build.dir}/collector/WEB-INF/classes"/>
-		<mkdir dir="${build.dir}/collector/WEB-INF/lib"/>
-		<mkdir dir="${build.dir}/collector/META-INF"/>
-		<copy todir="${build.dir}/collector/WEB-INF/classes">
-			<fileset dir="${build.classes}">
-				<include name="org/apache/hadoop/chukwa/conf/**/*.class" />
-			</fileset>
-			<fileset dir="${build.classes}">
-				<include name="org/apache/hadoop/chukwa/datacollection/**/*.class" />
-			</fileset>
-			<fileset dir="${build.classes}">
-				<include name="org/apache/hadoop/chukwa/extraction/**/*.class" />
-			</fileset>
-			<fileset dir="${build.classes}">
-				<include name="org/apache/hadoop/chukwa/util/**/*.class" />
-			</fileset>
-			<fileset dir="${basedir}/src/java">
-				<include name="org/apache/hadoop/chukwa/conf/**/*.java" />
-			</fileset>
-			<fileset dir="${basedir}/src/java">
-				<include name="org/apache/hadoop/chukwa/datacollection/**/*.java" />
-			</fileset>
-			<fileset dir="${basedir}/src/java">
-				<include name="org/apache/hadoop/chukwa/extraction/**/*.java" />
-			</fileset>
-			<fileset dir="${basedir}/src/java">
-				<include name="org/apache/hadoop/chukwa/util/**/*.java" />
-			</fileset>
-		</copy>
-
-		<copy todir="${build.dir}/collector">
-			<fileset dir="${basedir}/src/web/collector">
-				<include name="**" />
-			</fileset>
-		</copy>
-
-		<copy todir="${build.dir}/collector/WEB-INF/lib">
-			<fileset dir="${basedir}/lib">
-				<include name="log4j-${log4j.version}.jar" />
-				<include name="${build.dir}/${final.name}-core.jar" />
-			</fileset>
-		</copy>
-	</target>
-
-	<target name="collector_jar" depends="compile, collector" description="Create collector jar">
-		<jar jarfile="${build.dir}/collector-${chukwaVersion}.war" basedir="${build.dir}/collector">
-			<fileset dir="${build.dir}/collector" includes="**" />
-		</jar>
-	</target>
-
-	<target name="tools_jar" depends="compile, collector" description="Create tools jar">
-		<jar jarfile="${build.dir}/tools-${chukwaVersion}.jar" basedir="${build.classes}" includes="org/apache/hadoop/chukwa/inputtools/**/*.class">
-			<fileset dir="${basedir}/src/java">
-				<include name="org/apache/hadoop/chukwa/inputtools/**/*.java"/>
-			</fileset>
-		</jar>
-	</target>
-
-	<target name="agent_jar" depends="compile, collector" description="Create agent jar">
-		<jar jarfile="${build.dir}/chukwa-agent-${chukwaVersion}.jar" basedir="${build.classes}" includes="org/apache/hadoop/chukwa/client/**/*.class" >
-			<fileset dir="${build.dir}">
-				<include name="org/apache/hadoop/chukwa/conf/**/*.class"/>
-			</fileset>
-			<fileset dir="${build.dir}">
-				<include name="org/apache/hadoop/chukwa/util/**/*.class"/>
-			</fileset>
-			<fileset dir="${build.dir}">
-				<include name="org/apache/hadoop/chukwa/inputtools/**/*.class"/>
-			</fileset>
-			<fileset dir="${build.dir}">
-				<include name="org/apache/hadoop/chukwa/datacollection/**/*.class"/>
-			</fileset>
-
-			<fileset dir="${basedir}/src/java">
-				<include name="org/apache/hadoop/chukwa/client/**/*.java"/>
-			</fileset>
-			<fileset dir="${basedir}/src/java">
-				<include name="org/apache/hadoop/chukwa/util/**/*.java"/>
-			</fileset>
-			<fileset dir="${basedir}/src/java">
-				<include name="org/apache/hadoop/chukwa/inputtools/**/*.java"/>
-			</fileset>
-			<fileset dir="${basedir}/src/java">
-				<include name="org/apache/hadoop/chukwa/inputtools/**/*.java"/>
-			</fileset>
-			<fileset dir="${basedir}/src/java">
-				<include name="org/apache/hadoop/chukwa/datacollection/**/*.java"/>
-				<exclude name="org/apache/hadoop/chukwa/datacollection/collector/**/*.java"/>
-				<exclude name="org/apache/hadoop/chukwa/datacollection/writer/**/*.java"/>
-			</fileset>
-
-		</jar>
-	</target>
-
-	<target name="chukwa_jar" depends="compile, collector" description="Create chukwa-core jar">
-		<jar jarfile="${build.dir}/chukwa-core-${chukwaVersion}.jar" basedir="${build.classes}" includes="org/apache/hadoop/chukwa/datacollection/**/*.class" >
-                        <manifest>
-                            <section name="org/apache/hadoop/chukwa">
-                                <attribute name="Implementation-Title" value="Chukwa"/>
-                                <attribute name="Implementation-Version" value="${version}"/>
-                                <attribute name="Implementation-Vendor" value="Apache"/>
-                            </section>
-                        </manifest>
-
-			<fileset dir="${build.classes}">
-				<include name="org/apache/hadoop/chukwa/**/*.class"/>
-			</fileset>
-			<fileset dir="${basedir}/src/java">
-				<include name="org/apache/hadoop/chukwa/**/*.java"/>
-			</fileset>
-		</jar>
-	</target>
-
-	<target name="chukwa-hadoop_jar" depends="compile" description="Create chukwa_hadoop jar for use with getting hadoop to use chukwa">
-
-		<jar jarfile="${build.dir}/chukwa-hadoop-${chukwaVersion}-client.jar" basedir="${build.classes}" includes="org/apache/hadoop/chukwa/inputtools/log4j/**/*.class">
-			<fileset dir="${basedir}/src/java">
-				<include name="org/apache/hadoop/mapred/**/*.java"/>
-				<include name="org/apache/hadoop/chukwa/inputtools/log4j/**/*.java"/>
-		                <include name="org/apache/hadoop/chukwa/datacollection/client/**/*.java"/>
-		                <include name="org/apache/hadoop/chukwa/util/**/*.java"/>
-			</fileset>
-			<fileset dir="${basedir}/conf">
-		                <include name="chukwa-hadoop-metrics-log4j.properties"/>
-			</fileset>
-			<fileset dir="${build.classes}">
-				<include name="org/apache/hadoop/mapred/**/*.class"/>
-				<include name="org/apache/hadoop/chukwa/datacollection/client/**/*.class"/>
-				<include name="org/apache/hadoop/chukwa/util/**/*.class"/>
-				<include name="org/apache/hadoop/chukwa/datacollection/controller/*.class"/>
-			</fileset>
-		</jar>
-	</target>
-
-        <target name="hicc" depends="compile, chukwa_jar, package-hadoop, package-standalone" description="Prepare hicc.war">
-                <mkdir dir="${build.dir}/hicc"/>
-                <mkdir dir="${build.dir}/hicc/WEB-INF"/>
-                <mkdir dir="${build.dir}/hicc/WEB-INF/classes"/>
-                <mkdir dir="${build.dir}/hicc/WEB-INF/lib"/>
-                <mkdir dir="${build.dir}/hicc/META-INF"/>
-                <copy todir="${build.dir}/hicc">
-                        <fileset dir="${basedir}/src/web/hicc">
-                                <include name="**" />
-                        </fileset>
-                </copy>
-                <copy todir="${build.dir}/hicc/WEB-INF/classes">
-                        <fileset dir="${build.classes}">
-                                <include name="org/apache/hadoop/chukwa/hicc/**/*.class" />
-                        </fileset>
-                        <fileset dir="${build.classes}">
-                                <include name="org/apache/hadoop/chukwa/conf/**/*.class" />
-                        </fileset>
-                        <fileset dir="${build.classes}">
-                                <include name="org/apache/hadoop/chukwa/datacollection/**/*.class" />
-                        </fileset>
-                        <fileset dir="${build.classes}">
-                                <include name="org/apache/hadoop/chukwa/extraction/engine/**/*.class" />
-                        </fileset>
-                        <fileset dir="${build.classes}">
-                                <include name="org/apache/hadoop/chukwa/inputtools/*.class" />
-                        </fileset>
-                        <fileset dir="${build.classes}">
-                                <include name="org/apache/hadoop/chukwa/util/**/*.class" />
-                        </fileset>
-
-                        <fileset dir="${basedir}/src/java">
-                                <include name="org/apache/hadoop/chukwa/hicc/**/*.java" />
-                        </fileset>
-                        <fileset dir="${basedir}/src/java">
-                                <include name="org/apache/hadoop/chukwa/conf/**/*.java" />
-                        </fileset>
-                        <fileset dir="${basedir}/src/java">
-                                <include name="org/apache/hadoop/chukwa/datacollection/**/*.java" />
-                        </fileset>
-                        <fileset dir="${basedir}/src/java">
-                                <include name="org/apache/hadoop/chukwa/extraction/engine/**/*.java" />
-                        </fileset>
-                        <fileset dir="${basedir}/src/java">
-                                <include name="org/apache/hadoop/chukwa/inputtools/*.java" />
-                        </fileset>
-                        <fileset dir="${basedir}/src/java">
-                                <include name="org/apache/hadoop/chukwa/util/**/*.java" />
-                        </fileset>
-
-                </copy>
-                <copy todir="${build.dir}/hicc/WEB-INF/lib">
-                        <fileset dir="${basedir}/lib">
-                                <include name="json.jar" />
-                                <include name="jstl.jar" />
-                                <include name="servlet.jar" />
-                                <include name="taglibs.jar" />
-                                <include name="commons-fileupload-*.jar" />
-                                <include name="commons-httpclient-*.jar" />
-                                <include name="commons-logging-adapters-*.jar" />
-                                <include name="commons-io-*.jar" />
-                                <include name="commons-logging-api-*.jar" />
-                                <include name="commons-logging.jar" />
-                                <include name="log4j-*.jar" />
-                        </fileset>
-                        <fileset dir="${build.dir}">
-                                <include name="${name}-core-${chukwaVersion}.jar" />
-                        </fileset>
-                        <fileset dir="${build.dir}/${final.name}/hadoopjars">
-                                <include name="*.jar" />
-                                <exclude name="jasper-*.jar" />
-                                <exclude name="jetty-*.jar" />
-                                <exclude name="jsp-api.jar" />
-                        </fileset>
-                </copy>
-        </target>
-
-        <target name="hicc_war" depends="compile, hicc" description="Create hicc jar">
-                <jar jarfile="${build.dir}/hicc.war" basedir="${build.dir}/hicc">
-                        <fileset dir="${build.dir}/hicc" includes="**" />
-                </jar>
-        </target>
-
-	<target name="compress" depends="compile,collector,collector_jar,hicc_war,tools_jar,agent_jar,chukwa_jar,chukwa-hadoop_jar" description="Compression target">
-		<copy todir="." includeEmptyDirs="false">
-			<fileset dir="${build.dir}">
-				<exclude name="**" />
-				<include name="**/*.jar" />
-			</fileset>
-		</copy>
-	</target>
-
-	<target name="test" depends="compile,compile-test,test-chukwa" description="Automated Test Framework">
-	</target>
-
-        <!-- ================================================================== -->
-        <!-- Clean.  Delete the build files, and their directories              -->
-        <!-- ================================================================== -->
-        <target name="clean" depends="init" description="Clean.  Delete the build files, and their directories">
-            <delete dir="${build.dir}"/>
-            <delete dir="${build.ivy.lib.dir}/${ant.project.home}"/>
-            <delete dir="build"/>
-            <delete dir="${docs.src}/build"/>
-        </target>
-	<!-- ====================================================== -->
-	<!-- Macro definitions                                      -->
-	<!-- ====================================================== -->
-	<macrodef name="macro_tar" description="Worker Macro for tar">
-		<attribute name="param.destfile"/>
-		<element name="param.listofitems"/>
-		<sequential>
-			<tar compression="gzip" longfile="gnu"
-          destfile="@{param.destfile}">
-				<param.listofitems/>
-			</tar>
-		</sequential>
-	</macrodef>
-
-	<!-- ================================================================== -->
-	<!-- D I S T R I B U T I O N                                            -->
-	<!-- ================================================================== -->
-	<!--                                                                    -->
-	<!-- ================================================================== -->
-
-        <target name="package-hadoop" description="package hadoop from hadoop source" if="hadoopMode">
-		<mkdir dir="${build.dir}/${final.name}/hadoopjars"/>
-                <copy toDir="${build.dir}/${final.name}/hadoopjars">
-                        <fileset dir="${hadoop.root}/build">
-                                <include name="hadoop-*-core.jar" />
-                        </fileset>
-			<fileset dir="${build.ivy.lib.dir}\Hadoop\common">
-				
-                                <include name="jetty-*.jar" />
-                                <include name="commons-httpclient-*.jar" />
-                                <include name="commons-logging-*.jar" />
-                                <include name="commons-net-*.jar" />
-                                <include name="jasper-*.jar" />
-                                <include name="commons-el.jar" />
-                                <include name="commons-codec-*.jar" />
-			</fileset>
-			<fileset dir="lib">
-                                <include name="jsp-api.jar"/>
-			</fileset>
-                        <fileset dir="${hadoop.root}/lib">
-                                <include name="commons-cli-*.jar"/>
-                        </fileset>
-                </copy>
-        </target>
-
-        <target name="package-standalone" description="package hadoop from hadoop source" if="standaloneMode">
-		<mkdir dir="${build.dir}/${final.name}/hadoopjars"/>
-		<copy todir="${build.dir}/${final.name}/hadoopjars" includeEmptyDirs="false">
-			<fileset dir="${basedir}/hadoopjars">
-				<include name="**/*.jar" />
-			</fileset>
-		</copy>
-        </target>
-
-	<target name="package" depends="compress,package-hadoop,package-standalone" description="Build distribution">
-		<mkdir dir="${build.dir}/${final.name}"/>
-		<mkdir dir="${build.dir}/${final.name}/lib"/>
-		<mkdir dir="${build.dir}/${final.name}/bin"/>
-		<mkdir dir="${build.dir}/${final.name}/opt"/>
-		<mkdir dir="${build.dir}/${final.name}/tools"/>
-		<mkdir dir="${build.dir}/${final.name}/var"/>
-		<mkdir dir="${build.dir}/${final.name}/var/run"/>
-		<mkdir dir="${build.dir}/${final.name}/logs"/>
-
-		<copy todir="${build.dir}/${final.name}" includeEmptyDirs="false">
-			<fileset dir="${build.dir}">
-				<include name="**/*.jar" />
-				<include name="**/*.war" />
-			</fileset>
-		</copy>
-		<copy todir="${build.dir}/${final.name}/lib" includeEmptyDirs="false">
-			<fileset dir="lib">
-				<exclude name="**/native/**"/>
-			</fileset>
-			<fileset dir="${common.ivy.lib.dir}">
-				<include name="*.jar"/>
-			</fileset>
-		</copy>
-
-		<exec dir="${dist.dir}" executable="sh" failonerror="true">
-			<env key="BASE_NATIVE_LIB_DIR" value="${lib.dir}/native"/>
-			<env key="BUILD_NATIVE_DIR" value="${build.dir}/native"/>
-			<env key="DIST_LIB_DIR" value="${dist.dir}/lib/native"/>
-		</exec>
-
-		<copy todir="${build.dir}/${final.name}/bin">
-			<fileset dir="${basedir}/bin"/>
-		</copy>
-
-		<copy todir="${build.dir}/${final.name}/conf">
-			<fileset dir="${basedir}/conf"/>
-		</copy>
-
-		<copy todir="${build.dir}/${final.name}/docs">
-			<fileset dir="${docs.dir}" />
-		</copy>
-
-		<copy todir="${build.dir}/${final.name}/opt">
-			<fileset dir="${opt.dir}" />
-		</copy>
-
-		<copy todir="${build.dir}/${final.name}/tools">
-			<fileset dir="${tools.dir}" />
-		</copy>
-
-		<chmod perm="ugo+x" type="file" parallel="false">
-			<fileset dir="${build.dir}/${final.name}/tools">
-				<include name="**" />
-			</fileset>
-		</chmod>
-
-		<copy todir="${build.dir}/${final.name}">
-			<fileset dir=".">
-				<include name="*.txt" />
-			</fileset>
-		</copy>
-
-		<copy todir="${build.dir}/${final.name}/src" includeEmptyDirs="true">
-			<fileset dir="src" excludes="**/*.template **/docs/build/**/*"/>
-		</copy>
-
-		<copy todir="${build.dir}/${final.name}" file="build.xml"/>
-
-	</target>
-
-	<!-- ================================================================== -->
-	<!-- Make release tarball                                               -->
-	<!-- ================================================================== -->
-	<target name="tar" depends="package" description="Make release tarball">
-		<macro_tar param.destfile="${build.dir}/${final.name}.tar.gz">
-			<param.listofitems>
-				<tarfileset dir="${build.dir}" mode="664">
-					<exclude name="${final.name}/bin/*" />
-					<exclude name="${final.name}/org/*" />
-					<exclude name="${final.name}/collector/**" />
-					<exclude name="${final.name}/${final.name}/**" />
-					<exclude name="${final.name}/tools/**" />
-					<exclude name="${final.name}/hicc/*" />
-					<include name="${final.name}/**" />
-				</tarfileset>
-				<tarfileset dir="${build.dir}" mode="755">
-					<include name="${final.name}/bin/*" />
-					<include name="${final.name}/tools/**" />
-				</tarfileset>
-			</param.listofitems>
-		</macro_tar>
-	</target>
-
-	<target name="binary" depends="package" description="Make tarball without source and documentation">
-		<macro_tar param.destfile="${build.dir}/${final.name}-bin.tar.gz">
-			<param.listofitems>
-				<tarfileset dir="${build.dir}" mode="664">
-					<exclude name="${final.name}/bin/*" />
-					<exclude name="${final.name}/org/*" />
-					<exclude name="${final.name}/collector/**" />
-					<exclude name="${final.name}/${final.name}/**" />
-					<exclude name="${final.name}/tools/**" />
-					<exclude name="${final.name}/hicc/*" />
-					<exclude name="${final.name}/src/**" />
-					<exclude name="${final.name}/docs/**" />
-					<include name="${final.name}/**" />
-				</tarfileset>
-				<tarfileset dir="${build.dir}" mode="755">
-					<include name="${final.name}/bin/*" />
-					<include name="${final.name}/tools/**" />
-				</tarfileset>
-			</param.listofitems>
-		</macro_tar>
-	</target>
-
-        <target name="rpm" depends="clean,tar" description="Make release rpm">
-		<mkdir dir="${build.dir}/BUILD"/>
-		<mkdir dir="${build.dir}/RPMS"/>
-		<mkdir dir="${build.dir}/SOURCES"/>
-		<mkdir dir="${build.dir}/SPECS"/>
-		<copy todir="${build.dir}/SOURCES">
-			<fileset dir="${build.dir}">
-			        <include name="${final.name}.tar.gz" />
-                        </fileset>
-                </copy>
-                <echo file="${build.dir}/SPECS/chukwa.spec" append="false">
-# RPM Spec file for Chukwa v.${chukwaVersion}
-
-%define _topdir         ${build.dir}
-%define _prefix         ${rpm.prefix}
-%define uid             ${rpm.uid}
-%define gid             ${rpm.gid}
-%define name            chukwa
-%define summary         Distributed Computing Monitoring Framework.
-%define version         ${chukwaVersion}
-%define release         1
-%define license         ASF 2.0
-%define group           Development/Monitoring
-%define source          %{name}-%{version}.tar.gz
-%define vendor          Apache Software Fundation
-%define packager        Eric Yang
-%define buildroot       %{_topdir}/BUILD
-
-Name:      %{name}
-Version:   %{version}
-Release:   %{release}
-Packager:  %{packager}
-Vendor:    %{vendor}
-License:   %{license}
-Summary:   %{summary}
-Group:     %{group}
-Source0:   %{source}
-Prefix:    %{_prefix}
-Buildroot: %{buildroot}
-
-%description
-Chukwa is the monitoring framework for large scale distributed
-clusters.
-
-%prep
-%setup -q
-%build
-mkdir -p %{buildroot}%{_prefix}
-if [ -d %{buildroot}%{_prefix} ]; then
-    rm -rf %{buildroot}%{_prefix}
-fi
-mv %{buildroot}/%{name}-%{version} %{buildroot}%{_prefix}
-cd %{buildroot}%{_prefix}
-mkdir -p %{buildroot}/etc/init.d
-cat %{buildroot}%{_prefix}/tools/init.d/chukwa-data-processors | \
-sed 's:- chukwa -c:- %{uid} -c:' | \
-sed 's:CHUKWA_HOME=/usr/local:CHUKWA_HOME=%{_prefix}:' | \
-sed 's:CHUKWA_CONF_DIR=/usr/local/chukwa/conf:CHUKWA_CONF_DIR=%{_conf_dir}:' > %{buildroot}/etc/init.d/chukwa-data-processors
-cat %{buildroot}%{_prefix}/tools/init.d/chukwa-collector | \
-sed 's:- chukwa -c:- %{uid} -c:' | \
-sed 's:CHUKWA_HOME=/usr/local:CHUKWA_HOME=%{_prefix}:' | \
-sed 's:CHUKWA_CONF_DIR=/usr/local/chukwa/conf:CHUKWA_CONF_DIR=%{_conf_dir}:' > %{buildroot}/etc/init.d/chukwa-collector
-cat %{buildroot}%{_prefix}/tools/init.d/chukwa-agent | \
-sed 's:- chukwa -c:- %{uid} -c:' | \
-sed 's:CHUKWA_HOME=/usr/local:CHUKWA_HOME=%{_prefix}:' | \
-sed 's:CHUKWA_CONF_DIR=/usr/local/chukwa/conf:CHUKWA_CONF_DIR=%{_conf_dir}:' > %{buildroot}/etc/init.d/chukwa-agent
-cat %{buildroot}%{_prefix}/tools/init.d/chukwa-system-metrics | \
-sed 's:- chukwa -c:- %{uid} -c:' | \
-sed 's:CHUKWA_HOME=/usr/local:CHUKWA_HOME=%{_prefix}:' | \
-sed 's:CHUKWA_CONF_DIR=/usr/local/chukwa/conf:CHUKWA_CONF_DIR=%{_conf_dir}:' > %{buildroot}/etc/init.d/chukwa-system-metrics
-chmod a+x %{buildroot}/etc/init.d/chukwa-*
-rm -rf %{buildroot}%{_prefix}/src
-rm -rf %{buildroot}%{_prefix}/build.xml
-%post
-mkdir -p %{_prefix}
-echo "Congratulation!  You have successfully installed Chukwa."
-echo ""
-echo "To collect Data             : /etc/init.d/chukwa-system-metrics start"
-echo "To run Chukwa Agent         : /etc/init.d/chukwa-agent start"
-echo "To run Chukwa Collector     : /etc/init.d/chukwa-collector start"
-echo "To run Chukwa Data Processor: /etc/init.d/chukwa-data-processors start"
-%preun
-/etc/init.d/chukwa-data-processors stop
-/etc/init.d/chukwa-system-metrics stop
-/etc/init.d/chukwa-agent stop
-/etc/init.d/chukwa-collector stop
-echo
-%postun
-%files
-%defattr(-,%{uid},%{gid})
-%{_prefix}
-%{_prefix}/*
-%defattr(-,root,root)
-/etc/init.d/chukwa-data-processors
-/etc/init.d/chukwa-collector
-/etc/init.d/chukwa-agent
-/etc/init.d/chukwa-system-metrics
-                </echo>
-		<rpm specFile="chukwa.spec" topDir="${build.dir}" cleanBuildDir="true" failOnError="true"/>
-        </target>
-
-	<!-- the normal classpath -->
-	<path id="classpath">
-		<pathelement location="${build.classes}"/>
-		<fileset dir="${lib.dir}">
-			<include name="**/*.jar" />
-			<exclude name="**/excluded/" />
-		</fileset>
-		<pathelement location="${conf.dir}"/>
-	</path>
-
-</project>

+ 0 - 18
src/contrib/chukwa/conf/README

@@ -1,18 +0,0 @@
-Check for an updated copy of this README at http://wiki.apache.org/hadoop/Chukwa_Configuration
-
-Chukwa comes with templates for all configuration files. These files are in the conf directory and are named <conf file name>.template. You should be able to just make a copy of each of these files, removing the ".template" suffix.
-
-'''Required configuration files'''
-
- * conf/collectors - new line delimited list of collectors. This file is used by startup and shutdown scripts to determine where to run Chukwa  
- * collectors and is also used by Chukwa agents to find collectors to send their data to.
- * conf/chukwa-agents - a list of hosts in the cluster on which to run the Chukwa agent daemon.
- * conf/chukwa-env.sh - environment variables required to run Chukwa.
- * conf/chukwa-collectors-conf.xml - collector specific settings (e.g. port number to listen for agents on, which file system to store data in, etc.)
- * conf/chukwa-agents-conf.xml - agent specific settings.
-
-'''Optional configuration files'''
-
- * conf/alert.conf - a list of e-mail addresses to which chukwa alerts should be sent.
- * conf/initial_adaptors - a list of adaptors to add to all agents at startup time.
-

+ 0 - 88
src/contrib/chukwa/conf/aggregator.sql

@@ -1,88 +0,0 @@
-#insert into [node_util] select starttime, avg(unused) as unused, avg(used) as used from (select DATE_FORMAT(m.LAUNCH_TIME,'%Y-%m-%d %H:%i:%s') as starttime,sum(AvgCPUBusy*j.NumOfMachines/(60*100)) as unused,sum((100-AvgCPUBusy)*j.NumOfMachines/(60*100)) as used from HodJobDigest d join HodJob j on (d.HodID = j.HodID) join MRJob m on (m.HodID = j.HodID) where m.LAUNCH_TIME >= '2008-09-12 21:11' and m.LAUNCH_TIME <= '2008-09-12 22:11' and d.Timestamp >= m.LAUNCH_TIME and d.Timestamp <= m.FINISH_TIME group by m.MRJobID order by m.LAUNCH_TIME) as t group by t.starttime 
-#insert into [jobtype_util] select CASE WHEN MRJobName like 'PigLatin%' THEN 'Pig' WHEN MRJobName like 'streamjob%' THEN 'Streaming' WHEN MRJobName like '%abacus%' THEN 'Abacus' ELSE 'Other' END as m, count(*)*j.NumOfMachines/60 as nodehours,count(distinct(MRJobID)) as jobs from HodJobDigest d join HodJob j on (d.HodID = j.HodID) join MRJob m on (m.HodID = j.HodID) where d.Timestamp >= '2008-09-12 21:11' and d.Timestamp <= '2008-09-12 22:11' and d.Timestamp >= m.LAUNCH_TIME and d.Timestamp <= m.FINISH_TIME group by CASE WHEN MRJobName like 'PigLatin%' THEN 'Pig' WHEN MRJobName like 'streamjob%' THEN 'Streaming' WHEN MRJobName like '%abacus%' THEN 'Abacus' ELSE 'Other' END order by CASE WHEN MRJobName like 'PigLatin%' THEN 'Pig' WHEN MRJobName like 'streamjob%' THEN 'Streaming' WHEN MRJobName like '%abacus%' THEN 'Abacus' ELSE 'Other' END
-#insert into [a] select d.Timestamp as starttime,((AvgCPUBusy * j.NumOfMachines) / (sum(j.NumOfMachines) * 1)) as used from Digest d join HodJob j on (d.HodID = j.HodID) where d.Timestamp >= '[past_10_minutes]' and d.Timestamp <= '[now]' group by d.Timestamp order by d.Timestamp 
-#insert into [b] select m, sum(foo.nodehours) as nodehours from (select m.MRJobID, round(avg(if(AvgCPUBusy is null,0,AvgCPUBusy)),0) as m, count(*)*j.NumOfMachines/60 as nodehours from HodJobDigest d join HodJob j on (d.HodID = j.HodID) join MRJob m on (m.HodID = j.HodID) where d.Timestamp >= '[past_10_minutes]' and d.Timestamp <= '[now]' and d.Timestamp >= m.LAUNCH_TIME and d.Timestamp <= m.FINISH_TIME group by m.MRJobID) as foo group by m; 
-#insert into [c] select if(AvgCPUBusy is null,0,AvgCPUBusy) as m, CASE WHEN MRJobName like 'PigLatin%' THEN 'Pig' WHEN MRJobName like 'streamjob%' THEN 'Streaming' WHEN MRJobName like '%abacus%' THEN 'Abacus' ELSE 'Other' END as interface, count(*)*j.NumOfMachines/60 as nodehours,count(distinct(MRJobID)) as jobs from HodJobDigest d join HodJob j on (d.HodID = j.HodID) join MRJob m on (m.HodID = j.HodID) where d.Timestamp >= '[past_10_minutes]' and d.Timestamp <= '[now]' and d.Timestamp >= m.LAUNCH_TIME and d.Timestamp <= m.FINISH_TIME group by AvgCPUBusy,CASE WHEN MRJobName like 'PigLatin%' THEN 'Pig' WHEN MRJobName like 'streamjob%' THEN 'Streaming' WHEN MRJobName like '%abacus%' THEN 'Abacus' ELSE 'Other' END order by if(AvgCPUBusy is null,0,AvgCPUBusy)
-#insert into [cluster_hadoop_mapred] (select timestamp,[avg(hadoop_mapred_job)] from [hadoop_mapred_job] where timestamp between '[past_10_minutes]' and '[now]' group by timestamp);
-replace into [cluster_system_metrics] (select timestamp,[avg(system_metrics)] from [system_metrics] where timestamp between '[past_10_minutes]' and '[past_5_minutes]' group by timestamp);
-replace into [dfs_throughput] (select timestamp,[avg(dfs_datanode)] from [dfs_datanode] where timestamp between '[past_10_minutes]' and '[past_5_minutes]' group by timestamp);
-replace into [cluster_disk] (select a.timestamp,a.mount,a.used,a.available,a.used_percent from (select from_unixtime(unix_timestamp(timestamp)-unix_timestamp(timestamp)%60)as timestamp,mount,avg(used) as used,avg(available) as available,avg(used_percent) as used_percent from [disk] where timestamp between '[past_10_minutes]' and '[past_5_minutes]' group by timestamp,mount) as a group by a.timestamp, a.mount);
-replace delayed into [hod_job_digest] (select timestamp,d.hodid,d.userid,[avg(system_metrics)] from (select a.HodID,b.host as machine,a.userid,a.starttime,a.endtime from [HodJob] a join [hod_machine] b on (a.HodID = b.HodID) where endtime between '[past_10_minutes]' and '[past_5_minutes]') as d,[system_metrics] where timestamp between d.starttime and d.endtime and host=d.machine group by hodid,timestamp);
-replace into [cluster_hadoop_rpc] (select timestamp,[avg(hadoop_rpc)] from [hadoop_rpc] where timestamp between '[past_10_minutes]' and '[past_5_minutes]' group by timestamp);
-replace into [user_util] (select timestamp, j.UserID as user, sum(j.NumOfMachines) as node_total, sum(cpu_idle_pcnt*j.NumOfMachines) as cpu_unused, sum((cpu_user_pcnt+cpu_system_pcnt)*j.NumOfMachines) as cpu_used, avg(cpu_user_pcnt+cpu_system_pcnt) as cpu_used_pcnt, sum((100-(sda_busy_pcnt+sdb_busy_pcnt+sdc_busy_pcnt+sdd_busy_pcnt)/4)*j.NumOfMachines) as disk_unused, sum(((sda_busy_pcnt+sdb_busy_pcnt+sdc_busy_pcnt+sdd_busy_pcnt)/4)*j.NumOfMachines) as disk_used, avg(((sda_busy_pcnt+sdb_busy_pcnt+sdc_busy_pcnt+sdd_busy_pcnt)/4)) as disk_used_pcnt, sum((100-eth0_busy_pcnt)*j.NumOfMachines) as network_unused, sum(eth0_busy_pcnt*j.NumOfMachines) as network_used, avg(eth0_busy_pcnt) as network_used_pcnt, sum((100-mem_used_pcnt)*j.NumOfMachines) as memory_unused, sum(mem_used_pcnt*j.NumOfMachines) as memory_used, avg(mem_used_pcnt) as memory_used_pcnt from [hod_job_digest] d,[HodJob] j where (d.HodID = j.HodID) and Timestamp between '[past_10_minutes]' and '[past_5_minutes]' group by j.UserID);
-#
-# Down sample metrics for charts
-replace into [system_metrics_month] (select timestamp,[group_avg(system_metrics)] from [system_metrics_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
-replace into [system_metrics_quarter] (select timestamp,[group_avg(system_metrics)] from [system_metrics_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host);
-replace into [system_metrics_year] (select timestamp,[group_avg(system_metrics)] from [system_metrics_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
-replace into [system_metrics_decade] (select timestamp,[group_avg(system_metrics)] from [system_metrics_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
-#
-replace into [dfs_namenode_month] (select timestamp,[group_avg(dfs_namenode)] from [dfs_namenode_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
-replace into [dfs_namenode_quarter] (select timestamp,[group_avg(dfs_namenode)] from [dfs_namenode_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host);
-replace into [dfs_namenode_year] (select timestamp,[group_avg(dfs_namenode)] from [dfs_namenode_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
-replace into [dfs_namenode_decade] (select timestamp,[group_avg(dfs_namenode)] from [dfs_namenode_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
-#
-replace into [dfs_datanode_month] (select timestamp,[group_avg(dfs_datanode)] from [dfs_datanode_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
-replace into [dfs_datanode_quarter] (select timestamp,[group_avg(dfs_datanode)] from [dfs_datanode_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host);
-replace into [dfs_datanode_year] (select timestamp,[group_avg(dfs_datanode)] from [dfs_datanode_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
-replace into [dfs_datanode_decade] (select timestamp,[group_avg(dfs_datanode)] from [dfs_datanode_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
-#
-replace into [hadoop_rpc_month] (select timestamp,[group_avg(hadoop_rpc)] from [hadoop_rpc_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
-replace into [hadoop_rpc_quarter] (select timestamp,[group_avg(hadoop_rpc)] from [hadoop_rpc_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host);
-replace into [hadoop_rpc_year] (select timestamp,[group_avg(hadoop_rpc)] from [hadoop_rpc_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
-replace into [hadoop_rpc_decade] (select timestamp,[group_avg(hadoop_rpc)] from [hadoop_rpc_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
-#
-replace into [cluster_hadoop_rpc_month] (select timestamp,[avg(cluster_hadoop_rpc)] from [cluster_hadoop_rpc_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300));
-replace into [cluster_hadoop_rpc_quarter] (select timestamp,[avg(cluster_hadoop_rpc)] from [cluster_hadoop_rpc_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800));
-replace into [cluster_hadoop_rpc_year] (select timestamp,[avg(cluster_hadoop_rpc)] from [cluster_hadoop_rpc_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800));
-replace into [cluster_hadoop_rpc_decade] (select timestamp,[avg(cluster_hadoop_rpc)] from [cluster_hadoop_rpc_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200));
-#
-replace into [hadoop_mapred_month] (select timestamp,[group_avg(hadoop_mapred)] from [hadoop_mapred_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
-replace into [hadoop_mapred_quarter] (select timestamp,[group_avg(hadoop_mapred)] from [hadoop_mapred_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host);
-replace into [hadoop_mapred_year] (select timestamp,[group_avg(hadoop_mapred)] from [hadoop_mapred_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
-replace into [hadoop_mapred_decade] (select timestamp,[group_avg(hadoop_mapred)] from [hadoop_mapred_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
-#
-replace into [hadoop_jvm_month] (select timestamp,[group_avg(hadoop_jvm)] from [hadoop_jvm_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host,process_name);
-replace into [hadoop_jvm_quarter] (select timestamp,[group_avg(hadoop_jvm)] from [hadoop_jvm_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host,process_name);
-replace into [hadoop_jvm_year] (select timestamp,[group_avg(hadoop_jvm)] from [hadoop_jvm_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host,process_name);
-replace into [hadoop_jvm_decade] (select timestamp,[group_avg(hadoop_jvm)] from [hadoop_jvm_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host,process_name);
-#
-replace into [dfs_throughput_month] (select timestamp,[avg(dfs_throughput)] from [dfs_throughput_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300));
-replace into [dfs_throughput_quarter] (select timestamp,[avg(dfs_throughput)] from [dfs_throughput_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800));
-replace into [dfs_throughput_year] (select timestamp,[avg(dfs_throughput)] from [dfs_throughput_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800));
-replace into [dfs_throughput_decade] (select timestamp,[avg(dfs_throughput)] from [dfs_throughput_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200));
-#
-replace into [node_activity_month] (select timestamp,[avg(node_activity)] from [node_activity_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300));
-replace into [node_activity_quarter] (select timestamp,[avg(node_activity)] from [node_activity_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800));
-replace into [node_activity_year] (select timestamp,[avg(node_activity)] from [node_activity_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800));
-replace into [node_activity_decade] (select timestamp,[avg(node_activity)] from [node_activity_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200));
-#
-replace into [dfs_fsnamesystem_month] (select timestamp,[group_avg(dfs_fsnamesystem)] from [dfs_fsnamesystem_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
-replace into [dfs_fsnamesystem_quarter] (select timestamp,[group_avg(dfs_fsnamesystem)] from [dfs_fsnamesystem_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host);
-replace into [dfs_fsnamesystem_year] (select timestamp,[group_avg(dfs_fsnamesystem)] from [dfs_fsnamesystem_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
-replace into [dfs_fsnamesystem_decade] (select timestamp,[group_avg(dfs_fsnamesystem)] from [dfs_fsnamesystem_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
-#
-replace into [disk_month] (select timestamp,[group_avg(disk)] from [disk_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host,mount);
-replace into [disk_quarter] (select timestamp,[group_avg(disk)] from [disk_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host,mount);
-replace into [disk_year] (select timestamp,[group_avg(disk)] from [disk_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host,mount);
-replace into [disk_decade] (select timestamp,[group_avg(disk)] from [disk_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host,mount);
-#
-replace into [cluster_disk_month] (select timestamp,[group_avg(cluster_disk)] from [cluster_disk_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),mount);
-replace into [cluster_disk_quarter] (select timestamp,[group_avg(cluster_disk)] from [cluster_disk_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),mount);
-replace into [cluster_disk_year] (select timestamp,[group_avg(cluster_disk)] from [cluster_disk_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),mount);
-replace into [cluster_disk_decade] (select timestamp,[group_avg(cluster_disk)] from [cluster_disk_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),mount);
-#
-replace into [cluster_system_metrics_month] (select timestamp,[avg(cluster_system_metrics)] from [cluster_system_metrics_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300));
-replace into [cluster_system_metrics_quarter] (select timestamp,[avg(cluster_system_metrics)] from [cluster_system_metrics_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800));
-replace into [cluster_system_metrics_year] (select timestamp,[avg(cluster_system_metrics)] from [cluster_system_metrics_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800));
-replace into [cluster_system_metrics_decade] (select timestamp,[avg(cluster_system_metrics)] from [cluster_system_metrics_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200));
-#
-replace into [hod_job_digest_month] (select timestamp,[group_avg(hod_job_digest)] from [hod_job_digest_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),HodID);
-replace into [hod_job_digest_quarter] (select timestamp,[group_avg(hod_job_digest)] from [hod_job_digest_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),HodID);
-replace into [hod_job_digest_year] (select timestamp,[group_avg(hod_job_digest)] from [hod_job_digest_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),HodID);
-replace into [hod_job_digest_decade] (select timestamp,[group_avg(hod_job_digest)] from [hod_job_digest_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),HodID);
-#
-replace into [user_util_month] (select timestamp,[group_avg(user_util)] from [user_util_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),user);
-replace into [user_util_quarter] (select timestamp,[group_avg(user_util)] from [user_util_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),user);
-replace into [user_util_year] (select timestamp,[group_avg(user_util)] from [user_util_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),user);
-replace into [user_util_decade] (select timestamp,[group_avg(user_util)] from [user_util_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),user);

+ 0 - 1
src/contrib/chukwa/conf/alert.conf.template

@@ -1 +0,0 @@
-user@example.com

+ 0 - 27
src/contrib/chukwa/conf/chukwa-agent-conf.xml

@@ -1,27 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
-  
-  <property>
-    <name>chukwaAgent.control.port</name>
-    <value>9093</value>
-    <description>The socket port number the agent's control interface can be contacted at.</description>
-  </property>
-
-  <property>
-    <name>chukwaAgent.hostname</name>
-    <value>localhost</value>
-    <description>The hostname of the agent on this node. Usually localhost, this is used by the chukwa instrumentation agent-control interface library</description>
-  </property>
-
-  <property>
-    <name>chukwaAgent.tags</name>
-    <value>cluster="demo"</value>
-    <description>The cluster's name for this agent</description>
-  </property>
-
-
-</configuration>

+ 0 - 62
src/contrib/chukwa/conf/chukwa-agent-conf.xml.template

@@ -1,62 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
-  
-  
-  <property>
-    <name>chukwaAgent.tags</name>
-    <value>cluster="demo"</value>
-    <description>The cluster's name for this agent</description>
-  </property>
-  
-  <property>
-    <name>chukwaAgent.control.port</name>
-    <value>9093</value>
-    <description>The socket port number the agent's control interface can be contacted at.</description>
-  </property>
-
-  <property>
-    <name>chukwaAgent.hostname</name>
-    <value>localhost</value>
-    <description>The hostname of the agent on this node. Usually localhost, this is used by the chukwa instrumentation agent-control interface library</description>
-  </property>
-
-  <property>
-    <name>chukwaAgent.checkpoint.name</name>
-    <value>chukwa_agent_checkpoint</value>
-    <description>the prefix to to prepend to the agent's checkpoint file(s)</description>
-  </property>
-  
-  <property>
-    <name>chukwaAgent.checkpoint.dir</name>
-    <value>${CHUKWA_HOME}/var/tmp/</value>
-    <description>the location to put the agent's checkpoint file(s)</description>
-  </property>
-
-  <property>
-    <name>chukwaAgent.checkpoint.interval</name>
-    <value>5000</value>
-    <description>the frequency interval for the agent to do checkpoints, in milliseconds</description>
-  </property>
-
-  <property>
-    <name>chukwaAgent.sender.fastRetries</name>
-    <value>4</value>
-    <description>the number of post attempts to make to a single collector, before marking it failed</description>
-  </property>
-
-  <property>
-    <name>chukwaAgent.collector.retries</name>
-    <value>144000</value>
-    <description>the number of attempts to find a working collector</description>
-  </property>
-
-  <property>
-    <name>chukwaAgent.collector.retryInterval</name>
-    <value>20000</value>
-    <description>the number of milliseconds to wait between searches for a collector</description>
-  </property>
-</configuration>

+ 0 - 1
src/contrib/chukwa/conf/chukwa-agents.template

@@ -1 +0,0 @@
-localhost

+ 0 - 32
src/contrib/chukwa/conf/chukwa-collector-conf.xml

@@ -1,32 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
-
-  <property>
-    <name>writer.hdfs.filesystem</name>
-    <value>hdfs://localhost:9000/</value>
-    <description>HDFS to dump to</description>
-  </property>
-  
-  <property>
-    <name>chukwaCollector.outputDir</name>
-    <value>/chukwa/logs/</value>
-    <description>Chukwa data sink directory</description>
-  </property>
-
-  <property>
-    <name>chukwaCollector.rotateInterval</name>
-    <value>300000</value>
-    <description>Chukwa rotate interval (ms)</description>
-  </property>
-
-  <property>
-    <name>chukwaCollector.http.port</name>
-    <value>8080</value>
-    <description>The HTTP port number the collector will listen on</description>
-  </property>
-
-</configuration>

+ 0 - 32
src/contrib/chukwa/conf/chukwa-collector-conf.xml.template

@@ -1,32 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
-
-  <property>
-    <name>writer.hdfs.filesystem</name>
-    <value>hdfs://localhost:9000/</value>
-    <description>HDFS to dump to</description>
-  </property>
-  
-  <property>
-    <name>chukwaCollector.outputDir</name>
-    <value>/chukwa/logs/</value>
-    <description>Chukwa data sink directory</description>
-  </property>
-
-  <property>
-    <name>chukwaCollector.rotateInterval</name>
-    <value>300000</value>
-    <description>Chukwa rotate interval (ms)</description>
-  </property>
-
-  <property>
-    <name>chukwaCollector.http.port</name>
-    <value>8080</value>
-    <description>The HTTP port number the collector will listen on</description>
-  </property>
-
-</configuration>

+ 0 - 107
src/contrib/chukwa/conf/chukwa-demux-conf.xml

@@ -1,107 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
-
-
-  <property>
-    <name>chukwaArchiveBuilder.reduceCount</name>
-    <value>5</value>
-    <description>Reduce count </description>
-  </property>
-
-  
-  <property>
-    <name>SysLog</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.SysLog</value>
-    <description>Parser class for </description>
-  </property>
-
-  <property>
-    <name>Df</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.Df</value>
-    <description>Parser class for </description>
-  </property>
-
-  <property>
-    <name>HadoopLog</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.HadoopLogProcessor</value>
-    <description>Parser class for </description>
-  </property>
-
-  <property>
-    <name>HadoopMetricsProcessor</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.HadoopMetricsProcessor</value>
-    <description>Parser class for </description>
-  </property>
-
-  <property>
-    <name>Iostat</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.Iostat</value>
-    <description>Parser class for </description>
-  </property>
- 
-   <property>
-    <name>Log4jJobHistoryProcessor</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.Log4jJobHistoryProcessor</value>
-    <description>Parser class for </description>
-  </property>
-
-   <property>
-    <name>JobLogHistoryProcessor</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.JobLogHistoryProcessor</value>
-    <description>Parser class for </description>
-  </property>
-
- 
-   <property>
-    <name>PbsNodes</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.PbsNodes</value>
-    <description>Parser class for </description>
-  </property>
- 
-   <property>
-    <name>Sar</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.Sar</value>
-    <description>Parser class for </description>
-  </property>
-
-   <property>
-    <name>TsProcessor</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.TsProcessor</value>
-    <description>Parser class for </description>
-   </property>
-  
-   <property>
-    <name>Top</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.Top</value>
-    <description>Parser class for </description>
-   </property>
-
-   <property>
-    <name>Torque</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.Torque</value>
-    <description>Parser class for Parsing qstat and tracejob</description>
-   </property>
-  
-   <property>
-    <name>YWatch</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.YWatch</value>
-    <description>Parser class for </description>
-   </property>
-  
-   <property>
-    <name>DbLoader</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.TsProcessor</value>
-    <description>Parser class for </description>
-   </property>
-
-   <property>
-    <name>JobConf</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.JobConfProcessor</value>
-    <description>Parser class for Map reduce Job Configuration</description>
-   </property>
-    
-</configuration>

+ 0 - 41
src/contrib/chukwa/conf/chukwa-env.sh

@@ -1,41 +0,0 @@
-# Set Chukwa-specific environment variables here.
-
-# The only required environment variable is JAVA_HOME.  All others are
-# optional.  When running a distributed configuration it is best to
-# set JAVA_HOME in this file, so that it is correctly defined on
-# remote nodes.
-
-# The java implementation to use.  Required.
-export JAVA_HOME=/usr/lib/j2sdk1.5-sun
-
-# The location of the Hadoop the collector should use. Default 
-# assumes that this chukwa is living in hadoop's src/contrib directory
-export HADOOP_HOME="/usr/lib/hadoop/current"
-export HADOOP_CONF_DIR="${HADOOP_HOME}/conf/"
-
-# The directory where pid files are stored. CHUKWA_HOME/var/run by default.
-#export CHUKWA_PID_DIR=
-
-# The location of chukwa logs, defaults to CHUKWA_HOME/logs
-export CHUKWA_LOG_DIR=${CHUKWA_HOME}/logs
-
-# The location of a hadoop jars. use this if you are running a collector
-# without a running HDFS (i.e. which writes sequence files to local disk)
-# if this is not set, the default is to check HADOOP_HOME for jars or
-# classes, if those are not found, uses hadoop jars which come with chukwa
-export HADOOP_JAR=`ls ${HADOOP_HOME}/hadoop-*-core.jar`
-
-# The location of chukwa data repository
-export chuwaRecordsRepository="/chukwa/repos/"
-
-# The location of torque pbsnodes command
-export nodeActivityCmde="/usr/lib/torque/current/bin/pbsnodes "
-
-# The server which contain pbsnodes, qstat and tracejob.
-export TORQUE_SERVER=localhost
-
-# The location contain torque binaries.
-export TORQUE_HOME=/usr/lib/torque
-
-# The domain of the cluster
-#export DOMAIN=

+ 0 - 66
src/contrib/chukwa/conf/chukwa-env.sh.template

@@ -1,66 +0,0 @@
-# Set Chukwa-specific environment variables here.
-
-# The only required environment variable is JAVA_HOME.  All others are
-# optional.  When running a distributed configuration it is best to
-# set JAVA_HOME in this file, so that it is correctly defined on
-# remote nodes.
-
-# The java implementation to use.  Required.
-#export JAVA_HOME=/usr/lib/j2sdk1.5-sun
-
-# Optional (.ie. will try sensible defaults)
-# The location of the Hadoop the collector should use. Chukwa uses this to 
-# find the hadoop classes or jars as well as the hadoop executables
-# for running the mapreduce demux job. Chukwa will look first in
-# HADOOP_HOME/build for a hadoop-*-core.jar file, 
-# then in HADOOP_HOME/build/classes for hadoop classes.
-# If you want to use a hadoop jar straight up, without
-# a HADOOP_HOME/bin directory, you should set HADOOP_JAR (below), but
-# then chukwa mapreduce jobs wont work. If HADOOP_HOME is not set, default
-# assumes that this chukwa is living in hadoop's src/contrib directory, but
-# if jars or classes can not be found using that default assumption, 
-# Chukwa will default to the hadoop jars that come with it
-# in the hadoopjars directory. 
-#export HADOOP_HOME="/home/user/Development/hadoop-trunk"
-
-# Optional (i.e. will try sensible defaults)
-# The location of a hadoop jars. Setting this will overrided the search for 
-# hadoop jars described above (using HADOOP_HOME). If HADOOP_JAR is not set, 
-# the default is to check HADOOP_HOME/build (above) for 
-# jars or classes, if those are not found, uses hadoop jars which
-# come with chukwa in $CHUKWA_HOME/hadoopjars. YOU SHOULD ONLY NEED TO 
-# USE THIS IF YOU ARE RUNNING A COLLECTOR WITHOUT A RUNNING 
-# HDFS! (i.e. writing datasink sequence files to local disk). Be careful 
-# if you use HDFS for chukwa storage, but haven't built the 
-# hadoop classes or jar because chukwa will use the default hadoop jars 
-# that come with ckukwa, you can easily suffer
-# errors due to protocol mismatch between hadoop versions.
-#export HADOOP_JAR=${HADOOP_HOME}/build/hadoop-*-core.jar
-
-# The location of chukwa data repository (in either HDFS or your local
-# file system, whichever you are using)
-export chukwaRecordsRepository="/chukwa/repos/"
-
-# The directory where pid files are stored. CHUKWA_HOME/var/run by default.
-#export CHUKWA_PID_DIR="/tmp/chukwa-pid-dir"
-
-# The location of chukwa logs, defaults to CHUKWA_HOME/logs
-#export CHUKWA_LOG_DIR="/tmp/chukwa-log-dir"
-
-# The location of torque pbsnodes command
-#export nodeActivityCmde="
-
-# The server which contain pbsnodes, qstat and tracejob.
-#export TORQUE_SERVER=localhost
-
-# The location contain torque binaries.
-#export TORQUE_HOME=/usr/local/torque
-
-# Instance name for chukwa deployment
-export CHUKWA_IDENT_STRING=demo
-
-# Datatbase driver name for storing Chukwa Data.
-# export JDBC_DRIVER=
-
-# Database URL prefix for Database Loader.
-# export JDBC_URL_PREFIX=jdbc://

+ 0 - 31
src/contrib/chukwa/conf/chukwa-hadoop-metrics-log4j.properties

@@ -1,31 +0,0 @@
-log4j.appender.chukwa.rpc.recordType=HadoopMetricsProcessor
-log4j.appender.chukwa.rpc.chukwaClientHostname=localhost
-log4j.appender.chukwa.rpc.chukwaClientPortNum=9093
-log4j.appender.chukwa.rpc.DatePattern=.yyyy-MM-dd
-log4j.appender.chukwa.rpc.layout=org.apache.log4j.PatternLayout
-log4j.appender.chukwa.rpc.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-log4j.appender.chukwa.rpc.Dir=/tmp
-
-log4j.appender.chukwa.jvm.recordType=HadoopMetricsProcessor
-log4j.appender.chukwa.jvm.chukwaClientHostname=localhost
-log4j.appender.chukwa.jvm.chukwaClientPortNum=9093
-log4j.appender.chukwa.jvm.DatePattern=.yyyy-MM-dd
-log4j.appender.chukwa.jvm.layout=org.apache.log4j.PatternLayout
-log4j.appender.chukwa.jvm.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-log4j.appender.chukwa.jvm.Dir=/tmp
-
-log4j.appender.chukwa.dfs.recordType=HadoopMetricsProcessor
-log4j.appender.chukwa.dfs.chukwaClientHostname=localhost
-log4j.appender.chukwa.dfs.chukwaClientPortNum=9093
-log4j.appender.chukwa.dfs.DatePattern=.yyyy-MM-dd
-log4j.appender.chukwa.dfs.layout=org.apache.log4j.PatternLayout
-log4j.appender.chukwa.dfs.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-log4j.appender.chukwa.dfs.Dir=/tmp
-
-log4j.appender.chukwa.mapred.recordType=HadoopMetricsProcessor
-log4j.appender.chukwa.mapred.chukwaClientHostname=localhost
-log4j.appender.chukwa.mapred.chukwaClientPortNum=9093
-log4j.appender.chukwa.mapred.DatePattern=.yyyy-MM-dd
-log4j.appender.chukwa.mapred.layout=org.apache.log4j.PatternLayout
-log4j.appender.chukwa.mapred.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-log4j.appender.chukwa.mapred.Dir=/tmp

+ 0 - 19
src/contrib/chukwa/conf/chukwa-log4j.properties

@@ -1,19 +0,0 @@
-log4j.rootLogger=INFO, R 
-log4j.appender.R=org.apache.log4j.RollingFileAppender
-log4j.appender.R.File=${CHUKWA_LOG_DIR}/${APP}.log
-log4j.appender.R.MaxFileSize=10MB
-log4j.appender.R.MaxBackupIndex=10
-log4j.appender.R.layout=org.apache.log4j.PatternLayout
-log4j.appender.R.layout.ConversionPattern=%d{ISO8601} %p %t %c{1} - %m%n
-
-#
-# console
-# Add "console" to rootlogger above if you want to use this 
-#
-
-log4j.appender.console=org.apache.log4j.ConsoleAppender
-log4j.appender.console.target=System.err
-log4j.appender.console.layout=org.apache.log4j.PatternLayout
-log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
-
-

+ 0 - 1
src/contrib/chukwa/conf/collectors

@@ -1 +0,0 @@
-http://localhost:8080/

+ 0 - 1
src/contrib/chukwa/conf/collectors.template

@@ -1 +0,0 @@
-localhost

+ 0 - 7
src/contrib/chukwa/conf/commons-logging.properties

@@ -1,7 +0,0 @@
-#Logging Implementation
-
-#Log4J
-org.apache.commons.logging.Log=org.apache.commons.logging.impl.Log4JLogger
-
-#JDK Logger
-#org.apache.commons.logging.Log=org.apache.commons.logging.impl.Jdk14Logger

+ 0 - 604
src/contrib/chukwa/conf/database_create_tables

@@ -1,604 +0,0 @@
-create table if not exists node_activity_template 
-(
-    timestamp  timestamp default CURRENT_TIMESTAMP,
-    used int(11) default NULL,
-    usedMachines text,
-    free int(11) default NULL,
-    freeMachines text,
-    down int(11) default NULL,
-    downMachines text,
-    primary key(timestamp),
-    index (Timestamp)
-);
-
-create table if not exists switch_data_template (
-    timestamp timestamp default CURRENT_TIMESTAMP,
-    host varchar(40),
-    port varchar(10),
-    poller varchar(40),
-    metricName varchar(20),
-    value double,
-    primary key(timestamp, host, port),
-    index (Timestamp)
-);
-
-create table if not exists system_metrics_template (
-    timestamp  timestamp default CURRENT_TIMESTAMP,
-    host varchar(40),
-    load_15 double, 
-    load_5 double,
-    load_1 double,
-    task_total double,
-    task_running double,
-    task_sleep double,
-    task_stopped double,
-    task_zombie double,
-    mem_total double,
-    mem_buffers double,
-    mem_cached double,
-    mem_used double,
-    mem_free double,
-    eth0_rxerrs double,
-    eth0_rxbyts double,
-    eth0_rxpcks double,
-    eth0_rxdrops double,
-    eth0_txerrs double,
-    eth0_txbyts double,
-    eth0_txpcks double,
-    eth0_txdrops double,
-    eth1_rxerrs double,
-    eth1_rxbyts double,
-    eth1_rxpcks double,
-    eth1_rxdrops double,
-    eth1_txerrs double,
-    eth1_txbyts double,
-    eth1_txpcks double,
-    eth1_txdrops double,
-    sda_rkbs double,
-    sda_wkbs double,
-    sdb_rkbs double,
-    sdb_wkbs double,
-    sdc_rkbs double,
-    sdc_wkbs double,
-    sdd_rkbs double,
-    sdd_wkbs double,
-    cpu_idle_pcnt float,
-    cpu_nice_pcnt float,
-    cpu_system_pcnt float,
-    cpu_user_pcnt float,
-    cpu_hirq_pcnt float,
-    cpu_sirq_pcnt float,
-    iowait_pcnt float,
-    mem_buffers_pcnt float,
-    mem_used_pcnt float,
-    eth0_busy_pcnt float,
-    eth1_busy_pcnt float,
-    sda_busy_pcnt float,
-    sdb_busy_pcnt float,
-    sdc_busy_pcnt float,
-    sdd_busy_pcnt float,
-    swap_used_pcnt float,
-    primary key(host, timestamp),
-    index (timestamp)
-);
-
-create table if not exists disk_template (
-    timestamp  timestamp default CURRENT_TIMESTAMP,
-    host varchar(40),
-    mount varchar(40),
-    used double,
-    available double,
-    used_percent double,
-    fs varchar(40),
-    primary key(timestamp,host,mount),
-    index (timestamp)
-);
-
-create table if not exists cluster_disk_template (
-    timestamp  timestamp default CURRENT_TIMESTAMP,
-    mount varchar(40),
-    used double,
-    available double,
-    used_percent double,
-    primary key(timestamp,mount),
-    index (timestamp)
-);
-
-create table if not exists cluster_system_metrics_template (
-    timestamp  timestamp default CURRENT_TIMESTAMP,
-    host int,
-    load_15 double, 
-    load_5 double,
-    load_1 double,
-    task_total double,
-    task_running double,
-    task_sleep double,
-    task_stopped double,
-    task_zombie double,
-    mem_total double,
-    mem_buffers double,
-    mem_cached double,
-    mem_used double,
-    mem_free double,
-    eth0_rxerrs double,
-    eth0_rxbyts double,
-    eth0_rxpcks double,
-    eth0_rxdrops double,
-    eth0_txerrs double,
-    eth0_txbyts double,
-    eth0_txpcks double,
-    eth0_txdrops double,
-    eth1_rxerrs double,
-    eth1_rxbyts double,
-    eth1_rxpcks double,
-    eth1_rxdrops double,
-    eth1_txerrs double,
-    eth1_txbyts double,
-    eth1_txpcks double,
-    eth1_txdrops double,
-    sda_rkbs double,
-    sda_wkbs double,
-    sdb_rkbs double,
-    sdb_wkbs double,
-    sdc_rkbs double,
-    sdc_wkbs double,
-    sdd_rkbs double,
-    sdd_wkbs double,
-    cpu_idle_pcnt float,
-    cpu_nice_pcnt float,
-    cpu_system_pcnt float,
-    cpu_user_pcnt float,
-    cpu_hirq_pcnt float,
-    cpu_sirq_pcnt float,
-    iowait_pcnt float,
-    mem_buffers_pcnt float,
-    mem_used_pcnt float,
-    eth0_busy_pcnt float,
-    eth1_busy_pcnt float,
-    sda_busy_pcnt float,
-    sdb_busy_pcnt float,
-    sdc_busy_pcnt float,
-    sdd_busy_pcnt float,
-    swap_used_pcnt float,
-    primary key(timestamp),
-    index (timestamp)
-);
-
-create table if not exists dfs_namenode_template (
-    timestamp timestamp default 0,
-    host varchar(80),
-    add_block_ops double,
-    blocks_corrupted double,
-    create_file_ops double,
-    delete_file_ops double,
-    files_created double,
-    files_renamed double,
-    files_deleted double,
-    get_block_locations double,
-    get_listing_ops double,
-    safe_mode_time double,
-    syncs_avg_time double,
-    syncs_num_ops double,
-    transactions_avg_time double,
-    transactions_num_ops double,
-    block_report_avg_time double,
-    block_report_num_ops double,
-    fs_image_load_time double,
-    primary key(timestamp, host),
-    index(timeStamp)
-);
-
-create table if not exists dfs_datanode_template (
-    timestamp timestamp default 0,
-    host varchar(80),
-    block_reports_avg_time double,
-    block_reports_num_ops double,
-    block_verification_failures double,
-    blocks_read double,
-    blocks_removed double,
-    blocks_replicated double,
-    blocks_verified double,
-    blocks_written double,
-    bytes_read double,
-    bytes_written double,
-    copy_block_op_avg_time double,
-    copy_block_op_num_ops double,
-    heart_beats_avg_time double,
-    heart_beats_num_ops double,
-    read_block_op_avg_time double,
-    read_block_op_num_ops double,
-    read_metadata_op_avg_time double,
-    read_metadata_op_num_ops double,
-    reads_from_local_client double,
-    reads_from_remote_client double,
-    replace_block_op_avg_time double,
-    replace_block_op_num_ops double,
-    session_id double,
-    write_block_op_avg_time double,
-    write_block_op_num_ops double,
-    writes_from_local_client double,
-    writes_from_remote_client double,
-    primary key(timestamp, host),
-    index(timestamp)
-);
-
-create table if not exists dfs_fsnamesystem_template (
-    timestamp timestamp default 0,
-    host VARCHAR(80),
-    blocks_total double,
-    capacity_remaining_gb double,
-    capacity_total_gb double,
-    capacity_used_gb double,
-    files_total double,
-    pending_replication_blocks double,
-    scheduled_replication_blocks double,
-    total_load double,
-    under_replicated_blocks double,
-    primary key(timestamp, host),
-    index(timestamp)
-);
-
-create table if not exists dfs_throughput_template (
-    timestamp timestamp default 0,
-    host int,
-    block_reports_avg_time double,
-    block_reports_num_ops double,
-    block_verification_failures double,
-    blocks_read double,
-    blocks_removed double,
-    blocks_replicated double,
-    blocks_verified double,
-    blocks_written double,
-    bytes_read double,
-    bytes_written double,
-    copy_block_op_avg_time double,
-    copy_block_op_num_ops double,
-    heart_beats_avg_time double,
-    heart_beats_num_ops double,
-    read_block_op_avg_time double,
-    read_block_op_num_ops double,
-    read_metadata_op_avg_time double,
-    read_metadata_op_num_ops double,
-    reads_from_local_client double,
-    reads_from_remote_client double,
-    replace_block_op_avg_time double,
-    replace_block_op_num_ops double,
-    session_id double,
-    write_block_op_avg_time double,
-    write_block_op_num_ops double,
-    writes_from_local_client double,
-    writes_from_remote_client double,
-    primary key(timestamp),
-    index(timestamp)
-);
-
-create table if not exists hadoop_jvm_template (
-    timestamp timestamp default 0,
-    host VARCHAR(80),
-    process_name VARCHAR(80),
-    gc_timemillis double,
-    gc_count double,
-    log_error double,
-    log_fatal double,
-    log_info double,
-    log_warn double,
-    mem_heap_committed_m double,
-    mem_heap_used_m double,
-    mem_non_heap_committed_m double,
-    mem_non_heap_used_m double,
-    threads_blocked double,
-    threads_new double,
-    threads_runnable double,
-    threads_terminated double,
-    threads_timed_waiting double,
-    threads_waiting double,
-    primary key (timestamp, host, process_name),
-    index(timestamp)
-);
-
-create table if not exists hadoop_mapred_template (
-    timestamp timestamp default 0,
-    host VARCHAR(80),
-    jobs_completed double,
-    jobs_submitted double,
-    maps_completed double,
-    maps_launched double,
-    reduces_completed double,
-    reduces_launched double,
-    primary key (timestamp, host),
-    index(timestamp)
-);
-
-create table if not exists hadoop_rpc_template (
-    timestamp timestamp default 0,
-    host VARCHAR(80),
-    rpc_processing_time_avg_time double,
-    rpc_processing_time_num_ops double,
-    rpc_queue_time_avg_time double,
-    rpc_queue_time_num_ops double,
-    get_build_version_avg_time double,
-    get_build_version_num_ops double,
-    get_job_counters_avg_time double,
-    get_job_counters_num_ops double,
-    get_job_profile_avg_time double,
-    get_job_profile_num_ops double,
-    get_job_status_avg_time double,
-    get_job_status_num_ops double,
-    get_new_job_id_avg_time double,
-    get_new_job_id_num_ops double,
-    get_protocol_version_avg_time double,
-    get_protocol_version_num_ops double,
-    get_system_dir_avg_time double,
-    get_system_dir_num_ops double,
-    get_task_completion_events_avg_time double,
-    get_task_completion_events_num_ops double,
-    get_task_diagnostics_avg_time double,
-    get_task_diagnostics_num_ops double,
-    heartbeat_avg_time double,
-    heartbeat_num_ops double,
-    killJob_avg_time double,
-    killJob_num_ops double,
-    submit_job_avg_time double,
-    submit_job_num_ops double,
-    primary key (timestamp, host),
-    index(timestamp)
-);
-
-create table if not exists cluster_hadoop_rpc_template (
-    timestamp timestamp default 0,
-    host int,
-    rpc_processing_time_avg_time double,
-    rpc_processing_time_num_ops double,
-    rpc_queue_time_avg_time double,
-    rpc_queue_time_num_ops double,
-    get_build_version_avg_time double,
-    get_build_version_num_ops double,
-    get_job_counters_avg_time double,
-    get_job_counters_num_ops double,
-    get_job_profile_avg_time double,
-    get_job_profile_num_ops double,
-    get_job_status_avg_time double,
-    get_job_status_num_ops double,
-    get_new_job_id_avg_time double,
-    get_new_job_id_num_ops double,
-    get_protocol_version_avg_time double,
-    get_protocol_version_num_ops double,
-    get_system_dir_avg_time double,
-    get_system_dir_num_ops double,
-    get_task_completion_events_avg_time double,
-    get_task_completion_events_num_ops double,
-    get_task_diagnostics_avg_time double,
-    get_task_diagnostics_num_ops double,
-    heartbeat_avg_time double,
-    heartbeat_num_ops double,
-    killJob_avg_time double,
-    killJob_num_ops double,
-    submit_job_avg_time double,
-    submit_job_num_ops double,
-    primary key (timestamp),
-    index(timestamp)
-);
-
-create table if not exists hadoop_rpc_calls_template (
-    timestamp timestamp default 0,
-    method varchar(80),
-    calls double,
-    primary key(timestamp, method),
-    index(timestamp)
-);
-
-create table if not exists mssrgraph_template (
-    timestamp timestamp default 0,
-    job_id VARCHAR(80),
-    type VARCHAR(10),
-    count double,
-    primary key (timestamp, job_id),
-    index(timestamp)
-);
-
-create table if not exists mr_job(
-    HodID varchar(20),
-    MRJobID varchar(80),
-    MRJobName varchar(100),
-    STATUS varchar(10),
-    SUBMIT_TIME timestamp default 0,
-    LAUNCH_TIME timestamp default 0,
-    FINISH_TIME timestamp default 0,
-    MAPPER_PHASE_END_TIME timestamp default 0,
-    TOTAL_MAPS int unsigned,
-    TOTAL_REDUCES int unsigned,
-    FINISHED_MAPS  int unsigned,
-    FINISHED_REDUCES  int unsigned,      
-    NumOfLocalSplits int unsigned,
-    NumOfRackLocalSplits int unsigned,
-    NUM_OF_MAPPER_ATTEMPTS int unsigned,
-    NUM_OF_REDUCER_ATTEMPTS int unsigned,
-    MAPPER_PHASE_EXECUTION_TIME int,
-    AvgMapperExecutionTime int unsigned,
-    AvgLocalMapperExecutionTime int unsigned,
-    AvgRackLocalMapperExecutionTime int unsigned,
-    AvgRemoteMapperExecutionTime int unsigned,
-    AvgReducerExecutionTime int unsigned,
-    AvgShuffleExecutionTime int unsigned,
-    AvgSortExecutionTime int unsigned,
-    MapperClass varchar(80),
-    ReducerClass varchar(80),
-    PartitionerClass varchar(80),
-    CombinerClass varchar(80),
-    InputFormatClass varchar(80),
-    InputKeyClass varchar(80),
-    InputValueClass varchar(80),
-    OutputFormatClass varchar(80),
-    OutputCompressed tinyint,
-    OutputCompressionType  varchar(20),
-    OutputCompressionCodec varchar(20),
-    OutputKeyClass varchar(80),
-    OutputValueClass varchar(80),
-    MapOutputKeyClass varchar(80),
-    MapOutputValueClass varchar(80),
-    MapOutputCompressed tinyint,
-    MapOutputCompressionType  varchar(20),
-    MapOutputCompressionCodec varchar(20),
-    InputDataSizeInMB int unsigned,
-    MapOutputSizeInMB int unsigned,
-    OutputDataSizeInMB int unsigned,
-    MR_JOBCONF  text, 
-    InputDir  text, 
-    primary key(SUBMIT_TIME, HodID, MRJobID),
-    index(SUBMIT_TIME)
-);
-
-create table if not exists mr_job_counters_template (
-    timestamp timestamp default 0,
-    job_id BIGINT,
-    hdfs_bytes_read double,
-    hdfs_bytes_written double,
-    local_bytes_read double,
-    local_bytes_written double,
-    data_local_map_tasks double,
-    launched_map_tasks double,
-    launched_reduce_tasks double,
-    combine_input_records double,
-    combine_output_records double,
-    map_input_bytes double,
-    map_output_bytes double,
-    map_input_records double,
-    map_output_records double,
-    reduce_input_groups double,
-    reduce_input_records double,
-    reduce_output_records double,
-    primary key (timestamp, job_id),
-    index(timestamp)
-);
-
-create table if not exists mr_job_timeline_template (
-    timestamp timestamp default CURRENT_TIMESTAMP,
-    mr_job_id varchar(80),
-    task_type varchar(20),
-    tasks double,
-    time double,
-    primary key(timestamp, mr_job_id),
-    index(timestamp)
-);
-
-create table if not exists mr_finish_time_vs_size_template (
-    timestamp timestamp default CURRENT_TIMESTAMP,
-    mr_job_id varchar(80),
-    task_type varchar(20),
-    size double,
-    time double,
-    primary key(timestamp, mr_job_id),
-    index(timestamp)
-);
-    
-create table if not exists hod_machine_template (
-    hodid varchar(20) not null, 
-    host varchar(40) not null,
-    index(HodId)
-);
-
-create table if not exists HodJob_template (
-    HodID varchar(20), 
-    UserID varchar(20), 
-    Status  smallint,
-    JobTracker varchar(40), 
-    TimeQueued mediumint unsigned,
-    StartTime timestamp default CURRENT_TIMESTAMP, 
-    EndTime timestamp default 0,  
-    NumOfMachines smallint unsigned,  
-    SlotLimitPerTracker smallint unsigned,
-    LogProcessStatus varchar(20),
-    primary key(HodId),
-    index(StartTime, EndTime)
-);
-
-create table if not exists hod_job_digest_template (
-    timestamp timestamp default 0,
-    HodID VARCHAR(20),
-    UserID VARCHAR(20),
-    host int,
-    load_15 double, 
-    load_5 double,
-    load_1 double,
-    task_total double,
-    task_running double,
-    task_sleep double,
-    task_stopped double,
-    task_zombie double,
-    mem_total double,
-    mem_buffers double,
-    mem_cached double,
-    mem_used double,
-    mem_free double,
-    eth0_rxerrs double,
-    eth0_rxbyts double,
-    eth0_rxpcks double,
-    eth0_rxdrops double,
-    eth0_txerrs double,
-    eth0_txbyts double,
-    eth0_txpcks double,
-    eth0_txdrops double,
-    eth1_rxerrs double,
-    eth1_rxbyts double,
-    eth1_rxpcks double,
-    eth1_rxdrops double,
-    eth1_txerrs double,
-    eth1_txbyts double,
-    eth1_txpcks double,
-    eth1_txdrops double,
-    sda_rkbs double,
-    sda_wkbs double,
-    sdb_rkbs double,
-    sdb_wkbs double,
-    sdc_rkbs double,
-    sdc_wkbs double,
-    sdd_rkbs double,
-    sdd_wkbs double,
-    cpu_idle_pcnt float,
-    cpu_nice_pcnt float,
-    cpu_system_pcnt float,
-    cpu_user_pcnt float,
-    cpu_hirq_pcnt float,
-    cpu_sirq_pcnt float,
-    iowait_pcnt float,
-    mem_buffers_pcnt float,
-    mem_used_pcnt float,
-    eth0_busy_pcnt float,
-    eth1_busy_pcnt float,
-    sda_busy_pcnt float,
-    sdb_busy_pcnt float,
-    sdc_busy_pcnt float,
-    sdd_busy_pcnt float,
-    swap_used_pcnt float,
-    primary key(HodId, timestamp),
-    index(timestamp)
-); 
-
-create table if not exists user_util_template (
-    timestamp timestamp default CURRENT_TIMESTAMP,
-    user VARCHAR(20),
-    node_total int,
-    cpu_unused double,
-    cpu_used double,
-    cpu_used_pcnt float,
-    disk_unused double,
-    disk_used double,
-    disk_used_pcnt float,
-    network_unused double,
-    network_used double,
-    network_used_pcnt float,
-    memory_unused double,
-    memory_used double,
-    memory_used_pcnt float,
-    primary key(user, timestamp),
-    index(timestamp)
-);
-
-create table if not exists QueueInfo(
-    Timestamp timestamp default 0,
-    HodID VARCHAR(20),
-    Queue VARCHAR(20),
-    NumOfMachine smallint unsigned,
-    status varchar(1),
-    index(Timestamp)
-);

+ 0 - 43
src/contrib/chukwa/conf/fields.spec

@@ -1,43 +0,0 @@
-CombinerClass CombinerClass
-AvgLocalMapperExecutionTime AvgLocalMapperExecutionTime
-AvgMapperExecutionTime AvgMapperExecutionTime
-AvgRackLocalMapperExecutionTime AvgRackLocalMapperExecutionTime
-AvgReducerExecutionTime AvgReducerExecutionTime
-AvgRemoteMapperExecutionTime AvgRemoteMapperExecutionTime
-AvgShuffleExecutionTime AvgShuffleExecutionTime
-AvgSortExecutionTime AvgSortExecutionTime
-FINISH_TIME FINISH_TIME
-InputDataSizeInMB InputDataSizeInMB
-InputDir InputDir
-InputFormatClass InputFormatClass
-InputKeyClass InputKeyClass
-InputValueClass InputValueClass
-LAUNCH_TIME LAUNCH_TIME
-MAPPER_PHASE_END_TIME MAPPER_PHASE_END_TIME
-MAPPER_PHASE_EXECUTION_TIME MAPPER_PHASE_EXECUTION_TIME
-MRJobID MRJobID
-MRJobName MRJobName
-MapOutputCompressed MapOutputCompressed
-MapOutputCompressionCodec MapOutputCompressionCodec
-MapOutputCompressionType MapOutputCompressionType 
-MapOutputKeyClass MapOutputKeyClass
-MapOutputValueClass MapOutputValueClass
-MapOutputSizeInMB MapOutputSizeInMB
-MapperClass MapperClass
-NUM_OF_MAPPER_ATTEMPTS NUM_OF_MAPPER_ATTEMPTS
-NUM_OF_REDUCER_ATTEMPTS NUM_OF_REDUCER_ATTEMPTS
-NumOfLocalSplits NumOfLocalSplits
-NumOfRackLocalSplits NumOfRackLocalSplits
-OutputCompressed OutputCompressed
-OutputCompressionCodec OutputCompressionCodec
-OutputCompressionType OutputCompressionType 
-OutputDataSizeInMB OutputDataSizeInMB
-OutputFormatClass OutputFormatClass
-OutputKeyClass OutputKeyClass
-OutputValueClass OutputValueClass
-PartitionerClass PartitionerClass
-ReducerClass ReducerClass
-STATUS STATUS
-SUBMIT_TIME SUBMIT_TIME
-TOTAL_MAPS TOTAL_MAPS
-TOTAL_REDUCES TOTAL_REDUCES

+ 0 - 111
src/contrib/chukwa/conf/hadoop-log4j.properties

@@ -1,111 +0,0 @@
-# Define some default values that can be overridden by system properties
-hadoop.root.logger=INFO,console
-hadoop.log.dir=.
-hadoop.log.file=hadoop.log
-
-# Define the root logger to the system property "hadoop.root.logger".
-log4j.rootLogger=${hadoop.root.logger}, EventCounter
-
-# Logging Threshold
-log4j.threshhold=ALL
-
-#
-# Daily Rolling File Appender
-#
-
-#log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
-#log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
-
-#
-# CHUKWA
-#
-log4j.appender.DRFA=org.apache.hadoop.chukwa.inputtools.log4j.ChukwaDailyRollingFileAppender
-log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
-log4j.appender.DRFA.recordType=HadoopLog
-log4j.appender.DRFA.chukwaClientHostname=localhost
-log4j.appender.DRFA.chukwaClientPortNum=9093
-
-
-# Rollver at midnight
-log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
-
-# 30-day backup
-#log4j.appender.DRFA.MaxBackupIndex=30
-log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
-
-# Pattern format: Date LogLevel LoggerName LogMessage
-log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-# Debugging Pattern format
-#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
-
-#
-# AUDIT LOGGING - All audit events are logged at INFO level
-#
-# CHUKWA AUDIT LOG
-
-log4j.appender.DRFAAUDIT=org.apache.hadoop.chukwa.inputtools.log4j.ChukwaDailyRollingFileAppender
-log4j.appender.DRFAAUDIT.File=${hadoop.log.dir}/audit.log
-log4j.appender.DRFAAUDIT.recordType=HadoopLog
-log4j.appender.DRFAAUDIT.chukwaClientHostname=localhost
-log4j.appender.DRFAAUDIT.chukwaClientPortNum=9093
-log4j.appender.DRFAAUDIT.DatePattern=.yyyy-MM-dd
-log4j.appender.DRFAAUDIT.layout=org.apache.log4j.PatternLayout
-log4j.appender.DRFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=INFO,DRFAAUDIT
-log4j.additivity.org.apache.hadoop.fs.FSNamesystem.audit=false
-
-#
-# console
-# Add "console" to rootlogger above if you want to use this 
-#
-
-log4j.appender.console=org.apache.log4j.ConsoleAppender
-log4j.appender.console.target=System.err
-log4j.appender.console.layout=org.apache.log4j.PatternLayout
-log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
-
-#
-# TaskLog Appender
-#
-
-#Default values
-hadoop.tasklog.taskid=null
-hadoop.tasklog.noKeepSplits=4
-hadoop.tasklog.totalLogFileSize=100
-hadoop.tasklog.purgeLogSplits=true
-hadoop.tasklog.logsRetainHours=12
-
-log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
-log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
-log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
-
-log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
-log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-
-#
-# Rolling File Appender
-#
-
-#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
-#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
-
-# Logfile size and and 30-day backups
-#log4j.appender.RFA.MaxFileSize=1MB
-#log4j.appender.RFA.MaxBackupIndex=30
-
-#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
-#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
-#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
-
-# Custom Logging levels
-
-#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
-#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
-#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
-
-#
-# Event Counter Appender
-# Sends counts of logging messages at different severity levels to Hadoop Metrics.
-#
-log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
-

+ 0 - 11
src/contrib/chukwa/conf/hadoop-metrics.properties

@@ -1,11 +0,0 @@
-dfs.class=org.apache.hadoop.chukwa.inputtools.log4j.Log4JMetricsContext
-dfs.period=60
-
-jvm.class=org.apache.hadoop.chukwa.inputtools.log4j.Log4JMetricsContext
-jvm.period=60
-
-mapred.class=org.apache.hadoop.chukwa.inputtools.log4j.Log4JMetricsContext
-mapred.period=60
-
-rpc.class=org.apache.hadoop.chukwa.inputtools.log4j.Log4JMetricsContext
-rpc.period=60

+ 0 - 1
src/contrib/chukwa/conf/initial_adaptors.template

@@ -1 +0,0 @@
-add org.apache.hadoop.chukwa.datacollection.adaptor.filetailer.CharFileTailingAdaptorUTF8NewLineEscaped SysLog 0 /var/log/messages 0

+ 0 - 1
src/contrib/chukwa/conf/jdbc.conf

@@ -1 +0,0 @@
-unknown=jdbc:://localhost:3306/demo?user=example

+ 0 - 1
src/contrib/chukwa/conf/jdbc.conf.template

@@ -1 +0,0 @@
-unknown=jdbc:://localhost:3306/demo?user=example

+ 0 - 8
src/contrib/chukwa/conf/joblog.properties

@@ -1,8 +0,0 @@
-log4j.rootLogger=INFO, R 
-log4j.appender.R=org.apache.log4j.RollingFileAppender
-log4j.appender.R.File=${CHUKWA_HOME}/logs/joblog.log
-log4j.appender.R.MaxFileSize=10MB
-log4j.appender.R.MaxBackupIndex=10
-log4j.appender.R.layout=org.apache.log4j.PatternLayout
-log4j.appender.R.layout.ConversionPattern=%d{ISO8601} %p %t %c{1} - %m%n
-log4j.logger.org.apache.hadoop.chukwa.ikit.DataConfig=INFO, R

+ 0 - 12
src/contrib/chukwa/conf/log4j.properties

@@ -1,12 +0,0 @@
-log4j.rootLogger=INFO, R
-log4j.appender.R=org.apache.log4j.RollingFileAppender
-log4j.appender.R.File=${CHUKWA_LOG_DIR}/chukwa.log
-log4j.appender.R.MaxFileSize=10MB
-log4j.appender.R.MaxBackupIndex=10
-log4j.appender.R.layout=org.apache.log4j.PatternLayout
-log4j.appender.R.layout.ConversionPattern=%d{ISO8601} %p %t %c{1} - %m%n
-
-log4j.appender.stdout=org.apache.log4j.ConsoleAppender
-log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %p %t %c{1} - %m%n
-

+ 0 - 1263
src/contrib/chukwa/conf/mdl.xml.template

@@ -1,1263 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
-<!-- database tables -->
-
-<property>
-  <name>report.db.name.nodeactivity</name>
-  <value>node_activity</value>
-  <description></description>
-</property>
-  
-<property>
-  <name>report.db.primary.key.nodeactivity</name>
-  <value>timestamp</value>
-</property>
-
-<property>
-  <name>metric.nodeactivity.down</name>
-  <value>down</value>
-</property>
-
-<property>
-  <name>metric.nodeactivity.downmachines</name>
-  <value>downMachines</value>
-</property>
-
-<property>
-  <name>metric.nodeactivity.free</name>
-  <value>free</value>
-</property>
-
-<property>
-  <name>metric.nodeactivity.freemachines</name>
-  <value>freeMachines</value>
-</property>
-
-<property>
-  <name>metric.nodeactivity.used</name>
-  <value>used</value>
-</property>
-
-<property>
-  <name>metric.nodeactivity.usedmachines</name>
-  <value>usedMachines</value>
-</property>
-
-
-<property>
-  <name>report.db.name.hod_job_digest</name>
-  <value>hod_job_digest</value>
-</property>
-
-<property>
-  <name>report.db.name.cluster_system_metrics</name>
-  <value>cluster_system_metrics</value>
-</property>
-
-<property>
-  <name>report.db.name.systemmetrics</name>
-  <value>system_metrics</value>
-  <description></description>
-</property>
-
-<property>
-  <name>report.db.name.df</name>
-  <value>disk</value>
-</property>
-
-<property>
-  <name>report.db.name.cluster_disk</name>
-  <value>cluster_disk</value>
-</property>
-
-<property>
-  <name>report.db.name.hadoop_dfs_namenode</name>
-  <value>dfs_namenode</value>
-</property>
-
-<property>
-  <name>report.db.name.hadoop_dfs_datanode</name>
-  <value>dfs_datanode</value>
-</property>
-
-<property>
-  <name>report.db.name.hadoop_dfs_throughput</name>
-  <value>dfs_throughput</value>
-</property>
-
-<property>
-  <name>report.db.name.hadoop_dfs_fsnamesystem</name>
-  <value>dfs_fsnamesystem</value>
-</property>
-
-<property>
-  <name>report.db.name.hadoop_dfs_fsdirectory</name>
-  <value>dfs_namenode</value>
-</property>
-
-<property>
-  <name>report.db.name.hadoop_jvm_metrics</name>
-  <value>hadoop_jvm</value>
-</property>
-
-<property>
-  <name>report.db.name.hadoop_mapred_jobtracker</name>
-  <value>hadoop_mapred</value>
-</property>
-
-<property>
-  <name>report.db.name.hadoop_rpc_metrics</name>
-  <value>hadoop_rpc</value>
-</property>
-
-<property>
-  <name>report.db.name.cluster_hadoop_rpc</name>
-  <value>cluster_hadoop_rpc</value>
-</property>
-
-<property>
-  <name>report.db.name.mssrgraph</name>
-  <value>mssrgraph</value>
-</property>
-
-<property>
-  <name>report.db.name.mrjobcounters</name>
-  <value>MRJobCounters</value>
-</property>
-
-<property>
-  <name>report.db.name.hodjob</name>
-  <value>HodJob</value>
-</property>
-
-<property>
-  <name>report.db.name.hodmachine</name>
-  <value>hod_machine</value>
-</property>
-
-<property>
-  <name>report.db.name.mrjob</name>
-  <value>MRJob</value>
-</property>
-
-<property>
-  <name>report.db.name.mrjobts</name>
-  <value>MRJobTSData</value>
-</property>
-
-<property>
-  <name>report.db.name.hodjobunprocessed</name>
-  <value>HodJobUnprocessed</value>
-</property>
-
-<property>
-  <name>report.db.name.hodjobdigest</name>
-  <value>HodJobDigest</value>
-</property>
-
-<property>
-  <name>report.db.name.queueInfo</name>
-  <value>QueueInfo</value>
-</property>
-
-<property>
-  <name>report.db.name.jobcounters</name>
-  <value>MRJobCounters</value>
-</property>
-
-<property>
-  <name>report.db.name.user_util</name>
-  <value>user_util</value>
-</property>
-
-<!-- System Metrics Config -->
-<property>
-  <name>report.db.primary.key.systemmetrics</name>
-  <value>timestamp</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.csource</name>
-  <value>host</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.ldavg-1</name>
-  <value>load_1</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.ldavg-5</name>
-  <value>load_5</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.ldavg-15</name>
-  <value>load_15</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.tasks_total</name>
-  <value>task_total</value>
-</property>
-<property>
-  <name>metric.systemmetrics.tasks_running</name>
-  <value>task_running</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.tasks_sleeping</name>
-  <value>task_sleep</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.tasks_stopped</name>
-  <value>task_stopped</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.tasks_zombie</name>
-  <value>task_zombie</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.mem_total</name>
-  <value>mem_total</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.mem_buffers</name>
-  <value>mem_buffers</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.mem_free</name>
-  <value>mem_free</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.mem_used</name>
-  <value>mem_used</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.mem_shared</name>
-  <value>mem_shared</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.kbcached</name>
-  <value>mem_cached</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth0.rxerr/s</name>
-  <value>eth0_rxerrs</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth0.rxbyt/s</name>
-  <value>eth0_rxbyts</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth0.rxpck/s</name>
-  <value>eth0_rxpcks</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth0.rxdrop/s</name>
-  <value>eth0_rxdrops</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth0.txerr/s</name>
-  <value>eth0_txerrs</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth0.txbyt/s</name>
-  <value>eth0_txbyts</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth0.txpck/s</name>
-  <value>eth0_txpcks</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth0.txdrop/s</name>
-  <value>eth0_txdrops</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth1.rxerr/s</name>
-  <value>eth1_rxerrs</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth1.rxbyt/s</name>
-  <value>eth1_rxbyts</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth1.rxpck/s</name>
-  <value>eth1_rxpcks</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth1.rxdrop/s</name>
-  <value>eth1_rxdrops</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth1.txerr/s</name>
-  <value>eth1_txerrs</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth1.txbyt/s</name>
-  <value>eth1_txbyts</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth1.txpck/s</name>
-  <value>eth1_txpcks</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth1.txdrop/s</name>
-  <value>eth1_txdrops</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.sda.rkb/s</name>
-  <value>sda_rkbs</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.sda.wkb/s</name>
-  <value>sda_wkbs</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.sdb.rkb/s</name>
-  <value>sdb_rkbs</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.sdb.wkb/s</name>
-  <value>sdb_wkbs</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.sdc.rkb/s</name>
-  <value>sdc_rkbs</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.sdc.wkb/s</name>
-  <value>sdc_wkbs</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.sdd.rkb/s</name>
-  <value>sdd_rkbs</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.sdd.wkb/s</name>
-  <value>sdd_wkbs</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.%idle</name>
-  <value>cpu_idle_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.%nice</name>
-  <value>cpu_nice_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.%sys</name>
-  <value>cpu_system_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.%user</name>
-  <value>cpu_user_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.cpu_hi%</name>
-  <value>cpu_hirq_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.cpu_si%</name>
-  <value>cpu_sirq_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.%iowait</name>
-  <value>iowait_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.mem_buffers_pcnt</name>
-  <value>mem_buffers_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.mem_cached_pcnt</name>
-  <value>mem_cached_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.%memused</name>
-  <value>mem_used_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth0_busy_pcnt</name>
-  <value>eth0_busy_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.eth1_busy_pcnt</name>
-  <value>eth1_busy_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.sda.%util</name>
-  <value>sda_busy_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.sdb.%util</name>
-  <value>sdb_busy_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.sdc.%util</name>
-  <value>sdc_busy_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.sdd.%util</name>
-  <value>sdd_busy_pcnt</value>
-</property>
-
-<property>
-  <name>metric.systemmetrics.swap_used_pcnt</name>
-  <value>swap_used_pcnt</value>
-</property>
-
-<property>
-  <name>report.db.primary.key.df</name>
-  <value>timestamp</value>
-</property>
-
-<property>
-  <name>metric.df.available</name>
-  <value>available</value>
-</property>
-
-<property>
-  <name>metric.df.used</name>
-  <value>used</value>
-</property>
-
-<property>
-  <name>metric.df.use%</name>
-  <value>used_percent</value>
-</property>
-
-<property>
-  <name>metric.df.mounted-on</name>
-  <value>mount</value>
-</property>
-
-<property>
-  <name>metric.df.filesystem</name>
-  <value>fs</value>
-</property>
-
-<property>
-  <name>metric.df.csource</name>
-  <value>host</value>
-</property>
-
-<!-- dfs name node metrics -->
-<property>
-  <name>report.db.primary.key.hadoop_dfs_namenode</name>
-  <value>timestamp</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.csource</name>
-  <value>host</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.addblockops</name>
-  <value>add_block_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.blockscorrupted</name>
-  <value>blocks_corrupted</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.createfileops</name>
-  <value>create_file_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.deletefileops</name>
-  <value>delete_file_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.filescreated</name>
-  <value>files_created</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.filesrenamed</name>
-  <value>files_renamed</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.getblocklocations</name>
-  <value>get_block_locations</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.getlistingops</name>
-  <value>get_listing_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.safemodetime</name>
-  <value>safe_mode_time</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.syncs_avg_time</name>
-  <value>syncs_avg_time</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.syncs_num_ops</name>
-  <value>syncs_num_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.transactions_avg_time</name>
-  <value>transactions_avg_time</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.transactions_num_ops</name>
-  <value>transactions_num_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.blockreport_avg_time</name>
-  <value>block_report_avg_time</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.blockreport_num_ops</name>
-  <value>block_report_num_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_namenode.fsimageloadtime</name>
-  <value>fs_image_load_time</value>
-</property>
-
-<!-- dfs data node -->
-<property>
-  <name>report.db.primary.key.hadoop_dfs_datanode</name>
-  <value>timestamp</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.hostname</name>
-  <value>host</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.blockreports_avg_time</name>
-  <value>block_reports_avg_time</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.blockreports_num_ops</name>
-  <value>block_reports_num_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.block_verification_failures</name>
-  <value>block_verification_failures</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.blocks_read</name>
-  <value>blocks_read</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.blocks_removed</name>
-  <value>blocks_removed</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.blocks_replicated</name>
-  <value>blocks_replicated</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.blocks_verified</name>
-  <value>blocks_verified</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.blocks_written</name>
-  <value>blocks_written</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.bytes_read</name>
-  <value>bytes_read</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.bytes_written</name>
-  <value>bytes_written</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.copyblockop_avg_time</name>
-  <value>copy_block_op_avg_time</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.copyblockop_num_ops</name>
-  <value>copy_block_op_num_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.heartbeats_avg_time</name>
-  <value>heart_beats_avg_time</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.heartbeats_num_ops</name>
-  <value>heart_beats_num_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.readblockop_avg_time</name>
-  <value>read_block_op_avg_time</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.readblockop_num_ops</name>
-  <value>read_block_op_num_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.readmetadataop_avg_time</name>
-  <value>read_metadata_op_avg_time</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.readmetadataop_num_ops</name>
-  <value>read_metadata_op_num_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.reads_from_local_client</name>
-  <value>reads_from_local_client</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.reads_from_remote_client</name>
-  <value>reads_from_remote_client</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.replaceblockop_avg_time</name>
-  <value>replace_block_op_avg_time</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.replaceblockop_num_ops</name>
-  <value>replace_block_op_num_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.writeblockop_avg_time</name>
-  <value>write_block_op_avg_time</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.writeblockop_num_ops</name>
-  <value>write_block_op_num_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.writes_from_local_client</name>
-  <value>writes_from_local_client</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_datanode.writes_from_remote_client</name>
-  <value>writes_from_remote_client</value>
-</property>
-
-<!-- dfs fs name system status -->
-<property>
-  <name>report.db.primary.key.hadoop_dfs_fsnamesystem</name>
-  <value>timestamp</value>
-</property>
-<property>
-  <name>metric.hadoop_dfs_fsnamesystem.csource</name>
-  <value>host</value>
-</property>
-<property>
-  <name>metric.hadoop_dfs_fsnamesystem.blockstotal</name>
-  <value>blocks_total</value>
-</property>
-<property>
-  <name>metric.hadoop_dfs_fsnamesystem.capacityremaininggb</name>
-  <value>capacity_remaining_gb</value>
-</property>
-<property>
-  <name>metric.hadoop_dfs_fsnamesystem.capacitytotalgb</name>
-  <value>capacity_total_gb</value>
-</property>
-<property>
-  <name>metric.hadoop_dfs_fsnamesystem.capacityusedgb</name>
-  <value>capacity_used_gb</value>
-</property>
-<property>
-  <name>metric.hadoop_dfs_fsnamesystem.filestotal</name>
-  <value>files_total</value>
-</property>
-<property>
-  <name>metric.hadoop_dfs_fsnamesystem.pendingreplicationblocks</name>
-  <value>pending_replication_blocks</value>
-</property>
-<property>
-  <name>metric.hadoop_dfs_fsnamesystem.scheduledreplicationblocks</name>
-  <value>scheduled_replication_blocks</value>
-</property>
-<property>
-  <name>metric.hadoop_dfs_fsnamesystem.totalload</name>
-  <value>total_load</value>
-</property>
-<property>
-  <name>metric.hadoop_dfs_fsnamesystem.underreplicatedblocks</name>
-  <value>under_replicated_blocks</value>
-</property>
-
-<!-- dfs fsdirectory metrics -->
-<property>
-  <name>report.db.primary.key.hadoop_dfs_fsdirectory</name>
-  <value>timestamp</value>
-</property>
-
-<property>
-  <name>metric.hadoop_dfs_fsdirectory.csource</name>
-  <value>host</value>
-</property>
- <property>
-  <name>metric.hadoop_dfs_fsdirectory.files_deleted</name>
-  <value>files_deleted</value>
-</property>
-
-<!-- hadoop jvm metrics -->
-<property>
-  <name>report.db.primary.key.hadoop_jvm_metrics</name>
-  <value>timestamp</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.csource</name>
-  <value>host</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.gctimemillis</name>
-  <value>gc_timemillis</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.gccount</name>
-  <value>gc_count</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.logerror</name>
-  <value>log_error</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.logfatal</name>
-  <value>log_fatal</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.loginfo</name>
-  <value>log_info</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.logwarn</name>
-  <value>log_warn</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.memheapcommittedm</name>
-  <value>mem_heap_committed_m</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.memheapusedm</name>
-  <value>mem_heap_used_m</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.memnonheapcommittedm</name>
-  <value>mem_non_heap_committed_m</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.memnonheapusedm</name>
-  <value>mem_non_heap_used_m</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.processname</name>
-  <value>process_name</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.threadsblocked</name>
-  <value>threads_blocked</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.threadsnew</name>
-  <value>threads_new</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.threadsrunnable</name>
-  <value>threads_runnable</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.threadsterminated</name>
-  <value>threads_terminated</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.threadstimedwaiting</name>
-  <value>threads_timed_waiting</value>
-</property>
-
-<property>
-  <name>metric.hadoop_jvm_metrics.threadswaiting</name>
-  <value>threads_waiting</value>
-</property>
-
-<!-- hadoop map/reduce metrics -->
-<property>
-  <name>report.db.primary.key.hadoop_mapred_jobtracker</name>
-  <value>timestamp</value>
-</property>
-
-<property>
-  <name>metric.hadoop_mapred_jobtracker.csource</name>
-  <value>host</value>
-</property>
-<property>
-  <name>metric.hadoop_mapred_jobtracker.jobs_completed</name>
-  <value>jobs_completed</value>
-</property>
-<property>
-  <name>metric.hadoop_mapred_jobtracker.jobs_submitted</name>
-  <value>jobs_submitted</value>
-</property>
-<property>
-  <name>metric.hadoop_mapred_jobtracker.maps_completed</name>
-  <value>maps_completed</value>
-</property>
-
-<property>
-  <name>metric.hadoop_mapred_jobtracker.maps_launched</name>
-  <value>maps_launched</value>
-</property>
-
-<property>
-  <name>metric.hadoop_mapred_jobtracker.reduces_completed</name>
-  <value>reduces_completed</value>
-</property>
-
-<property>
-  <name>metric.hadoop_mapred_jobtracker.reduces_launched</name>
-  <value>reduces_launched</value>
-</property>
-
-<!-- hadoop rpc metrics -->
-<property>
-  <name>report.db.primary.key.hadoop_rpc_metrics</name>
-  <value>timestamp</value>
-</property>
-
-<property>
-  <name>metric.hadoop_rpc_metrics.csource</name>
-  <value>host</value>
-</property>
-
-<property>
-  <name>metric.hadoop_rpc_metrics.rpcprocessingtime_avg_time</name>
-  <value>rpc_processing_time_avg_time</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.rpcprocessingtime_num_ops</name>
-  <value>rpc_processing_time_num_ops</value>
-</property>
-
-<property>
-  <name>metric.hadoop_rpc_metrics.getbuildversion_avg_time</name>
-  <value>get_build_version_avg_time</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.getbuildversion_num_ops</name>
-  <value>get_build_version_num_ops</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.getjobcounters_avg_time</name>
-  <value>get_job_counters_avg_time</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.getjobcounters_num_ops</name>
-  <value>get_job_counters_num_ops</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.getjobprofile_avg_time</name>
-  <value>get_job_profile_avg_time</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.getjobprofile_num_ops</name>
-  <value>get_job_profile_num_ops</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.getjobstatus_avg_time</name>
-  <value>get_job_status_avg_time</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.getjobstatus_num_ops</name>
-  <value>get_job_status_num_ops</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.getnewjobid_avg_time</name>
-  <value>get_new_job_id_avg_time</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.getnewjobid_num_ops</name>
-  <value>get_new_job_id_num_ops</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.getprotocolversion_avg_time</name>
-  <value>get_protocol_version_avg_time</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.getprotocolversion_num_ops</name>
-  <value>get_protocol_version_num_ops</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.getsystemdir_avg_time</name>
-  <value>get_system_dir_avg_time</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.getsystemdir_num_ops</name>
-  <value>get_system_dir_num_ops</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.gettaskcompletionevents_avg_time</name>
-  <value>get_task_completion_events_avg_time</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.gettaskcompletionevents_num_ops</name>
-  <value>get_task_completion_events_num_ops</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.gettaskdiagnostics_avg_time</name>
-  <value>get_task_diagnostics_avg_time</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.gettaskdiagnostics_num_ops</name>
-  <value>get_task_diagnostics_num_ops</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.heartbeat_avg_time</name>
-  <value>heartbeat_avg_time</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.heartbeat_num_ops</name>
-  <value>heartbeat_num_ops</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.submitjob_avg_time</name>
-  <value>submit_job_avg_time</value>
-</property>
-<property>
-  <name>metric.hadoop_rpc_metrics.submitjob_num_ops</name>
-  <value>submit_job_num_ops</value>
-</property>
-
-<!-- Hod Machine -->
-<property>
-  <name>metric.hodmachine.machine</name>
-  <value>host</value>
-</property>
-
-<property>
-  <name>metric.hodmachine.hodid</name>
-  <value>hodid</value>
-</property>
-
-<!-- Hod Job -->
-<property>
-  <name>metric.hodjob.hodid</name>
-  <value>HodID</value>
-</property>
-
-<property>
-  <name>metric.hodjob.userid</name>
-  <value>UserID</value>
-</property>
-
-<property>
-  <name>metric.hodjob.status</name>
-  <value>Status</value>
-</property>
-
-<property>
-  <name>metric.hodjob.timequeued</name>
-  <value>TimeQueued</value>
-</property>
-
-<property>
-  <name>metric.hodjob.starttime</name>
-  <value>StartTime</value>
-</property>
-
-<property>
-  <name>metric.hodjob.endtime</name>
-  <value>EndTime</value>
-</property>
-
-<property>
-  <name>metric.hodjob.numofmachines</name>
-  <value>NumOfMachines</value>
-</property>
-
-<!-- MSSRGraph -->
-<property>
-  <name>report.db.primary.key.mssrgraph</name>
-  <value>timestamp</value>
-</property>
-
-<property>
-  <name>metric.mssrgraph.jobid</name>
-  <value>job_id</value>
-</property>
-<property>
-  <name>metric.mssrgraph.count</name>
-  <value>count</value>
-</property>
-
-<property>
-  <name>metric.mssrgraph.type</name>
-  <value>type</value>
-</property>
-
-<!-- Map Reduce Job Counters -->
-<property>
-  <name>report.db.primary.key.mrjobcounters</name>
-  <value>timestamp</value>
-</property>
-
-<property>
-  <name>metric.mrjobcounters.file_systems_hdfs_bytes_read</name>
-  <value>hdfs_bytes_read</value>
-</property>
-
-<property>
-  <name>metric.mrjobcounters.file_systems_hdfs_bytes_written</name>
-  <value>hdfs_bytes_written</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.file_systems_local_bytes_read</name>
-  <value>local_bytes_read</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.file_systems_local_bytes_written</name>
-  <value>local_bytes_written</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.job_counters__data-local_map_tasks</name>
-  <value>data_local_map_tasks</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.job_counters__launched_map_tasks</name>
-  <value>launched_map_tasks</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.job_counters__launched_reduce_tasks</name>
-  <value>launched_reduce_tasks</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.jobid</name>
-  <value>job_id</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.map-reduce_framework_combine_input_records</name>
-  <value>combine_input_records</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.map-reduce_framework_combine_output_records</name>
-  <value>combine_output_records</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.map-reduce_framework_map_input_bytes</name>
-  <value>map_input_bytes</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.map-reduce_framework_map_output_bytes</name>
-  <value>map_output_bytes</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.map-reduce_framework_map_input_records</name>
-  <value>map_input_records</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.map-reduce_framework_map_output_records</name>
-  <value>map_output_records</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.map-reduce_framework_reduce_input_groups</name>
-  <value>reduce_input_groups</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.map-reduce_framework_reduce_input_records</name>
-  <value>reduce_input_records</value>
-</property>
-<property>
-  <name>metric.mrjobcounters.map-reduce_framework_reduce_output_records</name>
-  <value>reduce_output_records</value>
-</property>
-
-<!-- Database summarization intervals -->
-<property>
-  <name>consolidator.table.dfs_namenode</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.dfs_datanode</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.hadoop_rpc</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.cluster_hadoop_rpc</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.hadoop_mapred</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.hadoop_jvm</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.system_metrics</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.dfs_throughput</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.node_activity</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.dfs_fsnamesystem</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.disk</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.cluster_disk</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.cluster_system_metrics</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.hod_job_digest</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.hod_machine</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.HodJob</name>
-  <value>5,30,180,720</value>
-</property>
-
-<property>
-  <name>consolidator.table.user_util</name>
-  <value>5,30,180,720</value>
-</property>
-
-</configuration>
-

+ 0 - 0
src/contrib/chukwa/conf/nodeActivity.properties


+ 0 - 0
src/contrib/chukwa/conf/queueinfo.properties


+ 0 - 13
src/contrib/chukwa/conf/system-data-loader.properties

@@ -1,13 +0,0 @@
-log4j.rootLogger=INFO, R
-log4j.appender.R=org.apache.hadoop.chukwa.inputtools.log4j.ChukwaDailyRollingFileAppender
-log4j.appender.R.File=${CHUKWA_LOG_DIR}/${RECORD_TYPE}.log
-log4j.appender.R.recordType=${RECORD_TYPE}
-log4j.appender.R.layout=org.apache.log4j.PatternLayout
-log4j.appender.R.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-
-log4j.appender.stdout=org.apache.log4j.ConsoleAppender
-log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %p %t %c{1} - %m%n
-
-log4j.logger.org.apache.hadoop.chukwa.ikit.DataConfig=DEBUG, R
-

+ 0 - 0
src/contrib/chukwa/conf/torque.properties


+ 0 - 0
src/contrib/chukwa/conf/util.properties


+ 0 - 19
src/contrib/chukwa/default.properties

@@ -1,19 +0,0 @@
-#   Licensed to the Apache Software Foundation (ASF) under one or more
-#   contributor license agreements.  See the NOTICE file distributed with
-#   this work for additional information regarding copyright ownership.
-#   The ASF licenses this file to You under the Apache License, Version 2.0
-#   (the "License"); you may not use this file except in compliance with
-#   the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License.
-rpm.prefix=/usr/local
-rpm.conf.dir=/usr/local/chukwa/conf
-hadoop.conf=/usr/local/hadoop
-rpm.uid=chukwa
-rpm.gid=users

+ 0 - 106
src/contrib/chukwa/docs/README

@@ -1,106 +0,0 @@
-WHAT CHUKWA IS
-
-Chukwa is a data collection system being built to monitor and analyze large
-distributed systems. Chukwa is built on top of Hadoop, an open source 
-Map-Reduce and distributed filesystem implementation, and inherits Hadoop's
-scalability and robustness. Chukwa also includes a flexible and powerful
-toolkit for displaying monitoring and analysis results, in order to make the
-best use of this collected data.
-
-Chukwa is currently in early stages of implementation. It runs, but documentation
-is still sketchy, and many rough edges remain.
-
-Chukwa is being developed as an open source project by Yahoo!, inc.
-The Chukwa development team consists of: Jerome Boulon, Andy Konwinski, Runping Qi,
-Ari Rabkin, Eric Yang, and Mac Yang.
-  
-Questions should be addressed to Mac Yang: macyang@yahoo-inc.com
-
-
-RUNNING CHUKWA -- LOCAL QUICKSTART
-
-
-The Chukwa monitoring system has a number of components.   
-This section gives guidance on starting each of them on your local machine. 
-
-You should start the collector first, then the agent, and finally any adaptors.
-
-*  Compiling and installing Chukwa
-
-   - If Chukwa is in the hadoop contrib directory, you should be able to just
-     say ``ant'' in the project root directory.
-   
-   - Otherwise, 
-
-*  Configuring and starting the Collector
-
-   - Copy conf/chukwa-collector-conf.xml.template to conf/chukwa-collector-conf.xml
-      
-   - Edit the writer.hdfs.filesystem property to point to a real filesystem.
-   
-   - If you are running hadoop, this should be the path to the namenode.
-   
-   - If you are not running hadoop, you can just juse a local path, of the form
-    file:///tmp/chukwa.
-    
-   - Copy conf/chukwa-env.sh-template to conf/chukwa-env.sh.  Set JAVA_HOME in file.
-    
-   - In the chukwa root directory, say ``bash bin/jettyCollector.sh''
-
-*  Configuring and starting the Local Agent
- 
-  - Copy conf/chukwa-agent-conf.xml.template to conf/chukwa-agent-conf.xml
-  
-  - Copy conf/collectors.template to conf/collectors
-
-  - In the chukwa root directory, say ``bash bin/agent.sh''
-
-*  Starting Adaptors
-  The local agent speaks a simple text-based protocol, by default over port 9093.
-Suppose you want Chukwa to start tailing a file /path/to/file of type MyFileType 
-on localhost:
-
-  - Telnet to localhost 9093
-  
-  - Type [without quotation marks] "ADD CharFileTailerUTF8 MyFileType /path/to/file 0"
-
-  - Chukwa internal Namenode's type is NameNodeType so for namenode log
-  Type [without quotation marks] "ADD CharFileTailerUTF8 NameNodeType /path/to/nameNodeFie 0"
-  
-  - Type "list" -- you should see the adaptor you just started, listed as running. 
-  
-  - Type  "close" to break the connection.
-  
-  If you don't have telnet, you can get the same effect with netcat (``nc''). 
-    
-*  Configuring and starting the demux job
-
-  - Edit bin/chukwa-config.sh to match your system configuration
-  
-  - In the chukwa root directory, say ``bash bin/processSinkFiles.sh'' 
-
-*  Configuring and starting Database
-
-  - Download MySQL connector from http://dev.mysql.com/downloads/connector/j/5.1.html
-
-  - Copy mysql-connector-*.jar to Chukwa lib directory.
-
-  - Configure mysql, and run:  mysql -u root dbname < database_create_table to install new database schema from Chukwa conf directory.
-  
-  - Configure JDBC driver name in conf/chukwa-env.sh: com.mysql.jdbc.Driver
-
-*  Configuring and starting HICC
-
-  - Download Apache Tomcat from http://tomcat.apache.org/download-60.cgi
-  
-  - Configure CHUKWA_HOME environment variable pointing to the Chukwa home directory.
-
-  - Copy hicc.war into Apache Tomcat webapps directory.
-
-  - Startup Tomcat.
-
-RUNNING CHUKWA -- NETWORKED
-
-Running Chukwa in a networked context is essentially similar to the single-machine
-deployment discussed above.  However, in a network context, you would also need to
-tell the local agent where the collector[s] live, by listing them in conf/collectors.

BIN
src/contrib/chukwa/docs/paper/chukwa0.jpg


BIN
src/contrib/chukwa/docs/paper/chukwa1.jpg


+ 0 - 304
src/contrib/chukwa/docs/paper/chukwa_08.tex

@@ -1,304 +0,0 @@
-% TEMPLATE for Usenix papers, specifically to meet requirements of
-%  USENIX '05
-% originally a template for producing IEEE-format articles using LaTeX.
-%   written by Matthew Ward, CS Department, Worcester Polytechnic Institute.
-% adapted by David Beazley for his excellent SWIG paper in Proceedings,
-%   Tcl 96
-% turned into a smartass generic template by De Clarke, with thanks to
-%   both the above pioneers
-% use at your own risk.  Complaints to /dev/null.
-% make it two column with no page numbering, default is 10 point
-
-% Munged by Fred Douglis <douglis@research.att.com> 10/97 to separate
-% the .sty file from the LaTeX source template, so that people can
-% more easily include the .sty file into an existing document.  Also
-% changed to more closely follow the style guidelines as represented
-% by the Word sample file. 
-% This version uses the latex2e styles, not the very ancient 2.09 stuff.
-\documentclass[letterpaper,twocolumn,10pt]{article}
-\usepackage{usenix,epsfig,endnotes,url}
-\begin{document}
-
-%don't want date printed
-\date{}
-
-%make title bold and 14 pt font (Latex default is non-bold, 16 pt)
-\title{  Chukwa: A large-scale monitoring system}
-
-%for single author (just remove % characters)
-\author{
-{\rm Jerome Boulon}\\
-{\rm jboulon@yahoo-inc.com}\\
-Yahoo!, inc
-\and
-{\rm Andy Konwinski}\\
-{\rm andyk@cs.berkeley.edu}\\
-UC Berkeley
-\and
-{\rm Runping Qi}\\
-{\rm runping@yahoo-inc.com}\\
-Yahoo!, inc
-\and
-{\rm Ariel Rabkin}\\
-{\rm asrabkin@cs.berkeley.edu}\\
-UC Berkeley
-\and
-{\rm Eric Yang}\\
-{\rm eyang@yahoo-inc.com}\\
-Yahoo!, inc
-\and
-{\rm Mac Yang}\\
-{\rm macyang@yahoo-inc.com}\\
-Yahoo!, inc
-% copy the following lines to add more authors
-% \and
-% {\rm Name}\\
-%Name Institution
-} % end author
-
-\maketitle
-
-% Use the following at camera-ready time to suppress page numbers.
-% Comment it out when you first submit the paper for review.
-%\thispagestyle{empty}
-
-\begin{abstract}
-
-We describe the design and initial implementation of Chukwa, a data collection system for monitoring and analyzing large distributed systems. Chukwa is built on top of Hadoop, an open source distributed filesystem and MapReduce implementation, and inherits Hadoop's scalability and robustness. 
-%Chukwa demonstrates that a distributed filesystem and MapReduce implementation are general distributed computing primitives, that can be utilized effectively in 
-Chukwa also includes a flexible and powerful toolkit for displaying monitoring and analysis results, in order to make the best use of this collected data. 
-
-\end{abstract}
-
-\section{Introduction}
-\label{sec:introduction}
-
-Hadoop is a distributed filesystem and MapReduce \cite{mapreduce} implementation that is used pervasively at Yahoo! for a variety of critical business purposes.  Production clusters often include thousands of nodes. Large distributed systems such as Hadoop are fearsomely complex, and can fail in complicated and subtle ways. As a result, Hadoop is extensively instrumented.  A two-thousand node cluster configured for normal operation generates nearly half a terabyte of monitoring data per day, mostly application-level log files.
-
-%I think we need to state that this is not intended only for Hadoop related data, but for all applications that run on our cluster, with Hadoop as the first customer, with others following closely on Hadoop's heels (who else do we have lined up here?)
-This data is invaluable for debugging, performance measurement, and operational monitoring.  However, processing this data in real time at scale is a formidable challenge. A good monitoring system ought to scale out to very large deployments, and ought to handle crashes gracefully.  In Hadoop, only a handful of aggregate metrics, such as task completion rate and available disk space, are computed in real time. The vast bulk of the generated data is stored locally, and accessible via a per-node web interface.  Unfortunately, this mechanism does not facilitate programmatic analysis of the log data, nor the long term archiving of such data.
-
-To make full use of log data, users must first write ad-hoc log aggregation scripts to centralize the required data, and then build mechanisms to analyze the collected data.  Logs are periodically deleted, unless users take the initiative in storing them.
-% [?cite matei @ facebook who did exactly this for them?]
-
-We believe that our situation is typical, and that local storage of logging data is a common model for very large deployments.  To the extent that more sophisticated data management techniques are utilized, they are largely supported by ad-hoc proprietary solutions.  
-A well documented open source toolset for handling monitoring data thus solves a significant practical problem and provides a valuable reference point for future development in this area. 
-
-We did not aim to solve the problem of real-time monitoring for failure detection, which systems such as Ganglia already do well. Rather, we wanted a system that would process large volumes of data, in a timescale of minutes, not seconds, to detect more subtle conditions, and to aid in failure diagnosis. Human engineers do not generally react on a timescale of seconds, and so a processing delay of a few minutes is not a concern for us.
-
-%In particular, we believe that a distributed filesystem is a natural tool for data storage, and that MapReduce is a natural way of processing monitoring data. Leveraging this existing infrastructure drastically reduces the engineering difficulty inherent in large-scale monitoring.
-
-%One of our overriding considerations was to use existing components as much as possible.  In particular, we sought to leverage the engineering that has gone into Hadoop's distributed filesystem 
- 
- We are in the process of building a system, which we call Chukwa, to demonstrate that practical large-scale can be readily built atop this existing infrastructure. \footnote{In Hindu mythology, Chukwa is the turtle that holds up Maha-pudma, the elephant that hold up the world.  This name is especially appropriate for us, since the the Hadoop mascot is a yellow elephant.}   
- it uses Hadoop's distributed file system (HDFS) as its data store, and relies on MapReduce jobs to process the data. By leveraging these existing tools, Chukwa can scale to thousands of nodes in both collection and analysis capacities, while providing a standardized and familiar framework  for processing the collected data. Many components of Chukwa are pluggable, allowing easy customization and enhancement.
- 
-The core components of Chukwa are largely complete, and we expect the system to enter production use at Yahoo! within the next few months. We have some initial operational experience, and preliminary performance metrics.    We begin by discussing our goals and requirements in some detail.  We then describe our design, explaining our motivation for various decisions. We next present some performance data, and conclude by offering some comparisons with related work.
-
-\section{Motivation and requirements} 
-
-We intend to use Chukwa to monitor multiple clusters of several thousand hosts, potentially generating several terabytes of data per day. Our goals in designing Chukwa were based on survey of our cluster user's functional requirements and performance demands.
-
-We expect Chukwa to be used by four different (though overlapping) constituencies:  Hadoop users, cluster operators, cluster managers, and Hadoop developers.  These different groups have different functional requirements:
-
-\begin{itemize}
-\item \textbf{Hadoop Users} will ask how far along their jobs are, and what resources are available for future jobs. They need access to the logs and output from their jobs.  
-
-\item \textbf{Operators} need to be notified of hardware failures and performance anomalies. They need to be warned about resource shortages, such as storage exhaustion.  
-
-\item \textbf{Managers} need guidance in provisioning, and in apportioning costs. This means that they need tools for analyzing past usage by users and groups, and for projecting future demands.   They need access to figures of merit, such as average job waiting time.
-
-\item \textbf{Hadoop Developers} need information about the performance in operation, bottlenecks within Hadoop, common failure patterns, and so forth.
-\end{itemize}
-
-Fortunately these different demands boil down to a comparatively small set of technical requirements.  Chukwa must collect a large and open-ended set of time series metrics and logs, as well as  slowly changing dimensions such as machine configuration.  Stored data should be available promptly, and should remain available indefinitely. Efficient querying and analysis of large data volumes is essential.
-
-Our initial goal was to be able to monitor Hadoop clusters of 2000 nodes, outputting 5 to 6 MB of data per second, and to have collected data available for processing within ten minutes. Few operational Hadoop clusters today are larger than 2000 nodes, and thus that figure represents a reasonable initial operating capability.  In section 4 of this paper, we report the operational measurements that justify our target data rate.
-
-While having all data available immediately after collection might be desirable, it is not actually crucial. Systems such as Nagios or Ganglia work well for real-time monitoring of metrics such as CPU load.   Human administrators can take few useful actions on timescales shorter than a few minutes, and so low-latency execution of more complex processing is not a priority.
-
-
-\section{Architecture}
-
-\begin{figure*}
- \includegraphics[width=150mm]{chukwa_pipeline.png}
-\caption{The Chukwa Pipeline, showing how long data is retained at each stage.} 
-\end{figure*}
-
-\iffalse
-\begin{figure}
-  \includegraphics[width=75mm]{chukwa1.jpg}
-\caption{The agent side}
-\end{figure}
-\fi
-
-
-At the heart of any data collection system is a pipeline to pump data from where it is generated to where it is stored. The requirements at the endpoints dictate the design of the system in the middle. To meet its goals, Chukwa needs flexible, dynamically controllable data sources, and a high performance, large scale storage system. %Generated data ought to be available for processing shortly after generation
- It also needs a suitable framework for analyzing the large volumes of collected data.
-
-\subsection{Adaptors}
-
-Data sources need to be dynamically controllable because the particular data being collected from a machine changes over time, and varies from machine to machine. For example, as Hadoop tasks start and stop, different log files must be monitored. We might want to increase our collection rate if we detect anomalies.  And of course, it makes no sense to collect Hadoop metrics on an NFS server. 
-
-These dynamically controllable data sources are known in Chukwa as \textit{adaptors}, since they generally are wrapping some other data source, such as a file or a Unix command-line tool.  At present, Chukwa includes adaptors to collect Hadoop logs, application metrics, and system telemetry. We expect to write adaptors for tasks like counting recoverable disk read errors, retrieving causal logs from X-Trace \cite{xtrace}, and monitoring operating system and Java virtual machine state.
-
-%\subsection{Adaptors}
-% 
-%As a result, we bundle data collection into small dynamically loadable Adaptors which run within a local agent process on each machine. This process is left permanently running, and is restarted automatically if it crashes. The agent process is responsible for starting and stopping adaptors in response to external commands.  It also provides two crucial services to adaptors. First, it is responsible for forwarding chunks over HTTP to the collectors, where they are written to stable storage. Second, it is responsible for making regular checkpoints of adaptor state, and restarting adaptors at the appropriate position after a crash.  This checkpoint mechanism ensures that data is appropriately resent after a crash.
-
-
-\subsection{Storage}
-
-The scalability challenges in large-scale monitoring systems primarily concern the data storage and analysis components, since that is where data from multiple machines is brought together. We determined from the outset to rely on Hadoop's HDFS as our storage component. Hadoop HDFS installations can store petabytes of data, and support high throughput; 20 MB/sec for one writer is typical in operational deployments, with total cluster throughput routinely in excess of a gigabyte per second. HDFS also facilitates parallel processing of stored data with MapReduce.
-
-Unfortunately, HDFS is not designed for the sort of workloads associated with monitoring. HDFS aims to handle large files and high write rates from comparatively small numbers of writers. It is not designed for thousands of concurrent low-rate writers, and millions of small files. Worse, writes to a file are not visible to readers until the file is closed, and stable versions of HDFS do not allow closed files to be reopened for writing. As a result, some care must be taken in using HDFS to support continuous rather than batch processing.
- Much of the Chukwa design was driven by the need to reconcile our many sporadic data sources with HDFS's performance characteristics and semantics.
- 
-\subsection{Collectors and Agents}
-
-Chukwa resolves these conflicting demands by adding additional pipeline stages between the adaptors and the HDFS data store: \textit{collectors} and \textit{agents}.
-
-Rather than have each adaptor write directly to HDFS, data is sent across the network to a \textit{collector} process, that does the HDFS writes.  Each collector receives data from several hundred hosts, and writes all this data to a single \textit{sink file}, consisting of chunks of data plus metadata describing each chunk's source and format. Periodically, collectors close their sink files, rename them to mark them available for processing, and resume writing a new file.  Data is sent to collectors over HTTP, since this allows us to write our collector as a Java servlet. This in turn lets us use standard Java servlet containers for connection management. This is in keeping with the Chukwa philosophy of leveraging existing infrastructure when possible. 
-
-Collectors thus drastically reduce the number of HDFS files generated by Chukwa, from one per machine or adaptor per unit time, to a handful per cluster.  The decision to put collectors between data sources and the data store has other benefits. Collectors hide the details of the HDFS file system in use, such as its Hadoop version, from the adaptors.  This is a significant aid to configuration.  It is especially helpful when using Chukwa to monitor a development cluster running a different version of Hadoop or when using Chukwa to monitor a non-Hadoop cluster.  
-
-The second of our intermediate stages, agents, are less fundamental to the design. They exist primarily to provide various services to adaptors, and thus to make adaptors easier to write. Agents are long-running processes on each machine being monitored by Chukwa.  Each agent process is restarted automatically if it crashes. The agent provides three chief services to adaptors. First, the agent is responsible for starting and stopping adaptors in response to external commands.
-Second, it is responsible for forwarding chunks over HTTP to the collectors, where they are written to stable storage. 
-Third, it is responsible for making regular checkpoints of adaptor state, and restarting adaptors at the appropriate position after a crash.  
-
-
-
-%FIXME: ruby/failmon
-
-%The output of an adaptor consists of chunks, each containing one or more semantically meaningful records.  These records, such as lines from a log file or batches of metrics, must be kept together.  This means that adaptors must parse files intensively enough to detect record boundaries, and they must correctly handle buffering.
-
-%These costs are offset by important advantages. Keeping records intact allows collectors to run filters or triggers against data streams, without having to buffer partial lines. In the presence of load balancing across collectors, these filters will still behave properly. Perhaps most importantly, this allows the map phase of the periodic MapReduce process to extract metadata from records and use this metadata as a sort key: a large performance win.  If records were split across collectors, this metadata could be inaccessible.
-
-%FIXME: say something here
-%After a crash, the local agent restarts each adaptor, passing sequence number of the last byte of their output that was successfully sent to a collector.  Adaptors that read from files can seek to that offset before starting.  Adaptors that output ephemeral data, such as CPU load, can simply begin numbering output from this point, to mask  any crash-induced discontinuity from downstream users. 
-
-%\subsection{Agent}
-
-%Adaptors run within a local agent process on each machine. This checkpoint mechanism ensures that data is appropriately resent after a crash.
-
-%We opted to put most of the recovery logic in the agents, rather than the collectors. This removes state from the collectors. We considered having agents check with collectors to find out what the last thing they sent was. However, each agent would have to check with each collector, resulting in an exorbitant number of connections if many agents fail and reboot simultaneously.
-
-
-\subsection{Demux and archiving}
-
-A pair of MapReduce jobs runs every few minutes, taking all the available sink files as input.  The first job simply archives all the collected data, without processing or interpreting it.  The second job parses out structured data from some of the logs, and loads this structured data into a data store.  
-
-% For performance reasons, we do record parsing in the Map phase, and extract key-value pairs. Example records include job history reports, task durations, and so forth. We then use the shuffle and sort phases of the Reduce to organize records semantically by application timestamp and type. This forced us to design our adaptors to have adaptors output meaningful records, and not arbitrary chunks of streams. %% FIXME: recast last sentence
-
- These datastores are also pluggable. For now, we use HDFS files, one file per cluster, per data type, and time period.  So for instance there would be one file for all of a particular clusters datanode logs, for the period from noon to 1pm on a given day.   This is only an interim solution, and we are evaluating various more suitable data stores, with support for structured queries.  Hive, an HDFS-backed data warehouse might also be a good fit here. \cite{hive} 
- % Hive supports a subset of SQL for queries, which is particularly appealing for the context of ad-hoc analytics. 
- Column-oriented databases such as HBase, and Hypertable would also be sensible options.  For small deployments, a local relational database would be suitable.
-
-%This de-multiplexing is the phase of Chukwa processing with the highest latency. Hadoop TaskTrackers only poll for work every few seconds, and therefore map-reduce jobs have an inherent latency larger than that.  Unfortunately, we do need this de-multiplexing stage, since HDFS, and structured stores built on top of it, perform much better for large writes than for small ones.
-
-Data stored in HDFS in a structured format can be processed straightforwardly with MapReduce jobs. We envision a library of ``canned'' MapReduce jobs for tasks like finding common failure modes, correlating events in the logs with slowdowns, discovering flakey machines, and so forth.  Since Chukwa data is split into different files based on content, these jobs take as input only a small fraction of the total data volume, and therefore can run relatively quickly.  Most structured storage systems, including Hive and Hypertable, include their own query interfaces. We expect that these interfaces will be used by users who want to do simple ad-hoc queries over stored Chukwa data, with MapReduce being reserved for more complex processing.
-
-\section{Data Analysis and Display}
-
-Collected data is only as useful as the analysis that can be done on it.  To ease analysis of collected data, we've built a flexible, configurable, ``portal-style'' web interface to Chukwa, termed the Hadoop Infrastructure Care Center (HICC). A configurable interface is not simply a frill --- it is necessary, since different users have very different data analysis needs.
-
-\begin{figure}
-% \includegraphics[width=75mm]{widget.png}
-%\caption{The HICC widget catalog} 
-\includegraphics[width=75mm]{hicc_in_action2.png}
-\caption{HICC displaying some DataNode metrics}
-\end{figure}
-
-
-In practice, a single individual often fulfills more than one of these roles, or some portion of a role. As a result, there is a compelling need to allow individuals to mix and match different components. We chose to do this by bundling each query, or family of queries, into a widget. HICC users can assemble their HICC workspace by selecting widgets from a catalog, in exactly the way that they can customize their personal Yahoo! or Google portal pages.
-
-Some of these components will display the results of canned map-reduce jobs run against data in  Chukwa storage.  Others will perform on-the-fly queries against SQL databases.    Still others might display telemetry collected with Ganglia, or report on recently opened failure tickets.  
-
-% HICC stores several kinds of widget state.  There is a global context, through which different widgets can ``collaborate'' and share data. There is a dashboard view, which stores information about the user's preferred layout, and display settings.  There is a widget descriptor file, for storing widget configuration and parameters.   
-
-%HICC is not intended for exploratory, ad-hoc queries. For that, we expect to rely on the query interface of our structured data store.  Since this store is not yet available, we have been using a MySQL database.
-
-
-\section{Evaluation}
-
-%Chukwa is currently in development, and we have not yet been able to do large-scale tests.  However, we have reason to be confident that Chukwa will comfortably meet our performance goals.
-
-Using logs from a production cluster at Yahoo!, we found that a 2000-node production cluster would generate around 5.5 MB of data per second.  Of this, the vast bulk (more than 95\%) was task tracker logs.  Metrics data accounted for more than half the remainder, with Namenode, HDFS datanode, and JobTracker logs accounting for the rest. This data rate is small enough that Chukwa should impose only very modest overhead on datacenter networks. 
-
-We conducted a number of small experiments to verify that Chukwa could handle this load.   All tests were run on an internal development cluster at Yahoo.  Machines had four 2.8 GHz Xeon processors, four IDE disks, and 3 GB of RAM, and ran Linux, with a 2.6.9 kernel.  %kernel version 2.6.9-55
-There are two potential bottlenecks in Chukwa that we evaluated in detail, the collector, and the map-reduce job.  At present, collector throughput is more than adequate, and the demux job is the limiting phase in processing.
-
-To measure collector performance, we ran Chukwa on a 400 node test cluster. We configured nodes in this cluster to report data at many times the normal operational rate, emulating a much larger cluster.  In this configuration, the test cluster generated 14.4 megabytes of monitoring data per second.  A single collector was able to keep up with this data volume, and write it to HDFS; in a 30 minute test run, machine utilization never rose much above 50\%.  At this rate, we are bumping into the single-writer throughput limits imposed by HDFS, rather than any Chukwa-specific limits. Higher Chukwa bandwidth could be achieved by simply adding more writers.
-
-%The scalability limits we observed in more intensive experiments were caused by thread limits in Jetty, the Servlet container we are using at present. These limits can likely be overcome by more careful configuration. However, 7.2 MB/sec significantly exceeds both our performance goals and the rate at which we can process incoming data, so we have yet to perform this optimization. 
-
-At present, the rate-limiting phase of Chukwa is the Demux job. Using five worker nodes, our MapReduce job can process two gigabytes of metrics data in around three and a half minutes. We conducted five trials on the same 2 GB of test data.  Completion times ranged from 3:25 minutes to 3:34, with a mean of 3:30.  This means that we can can process six minutes' of incoming data in three and a half minutes, thus keeping up with the incoming data flow and achieving our ten minute target latency.  
-Optimizing MapReduce jobs is fairly routine engineering at this point, and we believe that significant gains can be achieved here. 
-
-These results show that Chukwa can maintain latencies well under our ten minute target, while imposing very modest overheads on the cluster: five Chukwa nodes are only 0.25\% of our notional 2000-node cluster. We expect to be able to maintain these latency targets as we scale up the number of nodes being monitored.  Ramping up the size of MapReduce jobs is routine, and the engineering issues are well understood. Even for monitoring hundreds of thousands of nodes, Chukwa's data volumes would be significantly smaller than those seen in our production web indexing clusters.
-
-
-\section{Related Work}
-%\label{sec:related}
-
-Chukwa represents a design point in between two existing classes of systems: log collection frameworks on the one hand, and network management systems on the other.  Chukwa intends to combine the abundance of data display tools of existing NMS systems, with the high throughput and robustness expected of log collection frameworks.
-
-The syslog protocol supported streaming logs across the network as long ago as the late 1980s.  However, syslog had serious defects: no clear solution to the discovery, load balancing, or failure handing problems.  Facebook's Scribe \cite{scribe} system apparently solves some of these problems, but unfortunately, no details of Scribe have been published.
-
-Chukwa has some similarity with network monitoring systems such as Nagios, Ganglia, or Tivoli Monitoring \cite{Ganglia, Nagios, tivmon}. The three systems differ in emphasis, but have important commonalities.  All are capable of collecting and storing substantial volumes of metrics data. All include tools for displaying this data.  Nagios and Tivoli monitoring have centralized architectures, while Ganglia is decentralized.  Ganglia, unfortunately, is heavily adapted towards numeric time-series data, and provides minimal support for the sort of complex text-processing necessary for our applications.
-
-Chukwa, however, differs in crucial respects from these current systems. Today's monitoring systems are focused primarily on collection, with storage being a secondary priority.  Chukwa is designed for far higher data rates; metrics data, which is essentially all that Ganglia and Nagios are used to collect, is only a few percent of the data we will capture in operational settings. 
-
-With hundreds of gigabytes of data being collected per day, processing the stored data becomes a key bottleneck.  Chukwa's design was optimized precisely for storage and batch processing of collected data.  While MapReduce is routinely used at these scales, no currently available monitoring system makes provision for large-scale data intensive processing. 
-
-
-\section{Conclusion}
-%\label{sec:conclusion}
-
-Chukwa demonstrates that a high performance distributed monitoring system can readily be built atop existing distributed data collection frameworks. The Hadoop distributed file system supports petabytes of stored data and hundreds of megabytes per second of write throughput, enough for even very demanding monitoring applications. MapReduce provides a suitable framework for organizing and analyzing these data volumes.
-
-% Chukwa provides flexible and powerful tools for analyzing and displaying collected monitoring data.  We expect Chukwa to be very useful in monitoring and managing large system deployments.
-
-Building Chukwa on top of Hadoop resulted in a few design quirks, and a modest latency penalty.  However, it greatly simplified implementation, and leverages the substantial amount of work going into Hadoop.  Hadoop 0.19, which will be released within a few months, should significantly improve the performance of short-running Map tasks, which will allow us to efficiently operate Chukwa on short timescales.
-
-%\iffalse
-%\subsection{Future Work}
-%\fi
-
-%\section{Acknowledgments}
-
-\addcontentsline{toc}{section}{References}
-\begin{thebibliography}{99}
-
-\bibitem{mapreduce}
-Jeffrey Dean and Sanjay Ghemawat. ``MapReduce: Simplified Data Processing on Large Clusters.'' \newblock In \textit{Communications of the ACM}, Volume 51, Issue 1, pp. 107-113, 2008.
-\bibitem{Ganglia}
-Matthew L. Massie, Brent N. Chun, and David E. Culler.  ``The Ganglia Distributed Monitoring System: Design, Implementation, and Experience''.
-\newblock In \textit{Parallel Computing} Volume 30, Issue 7, pp 817-840, 2004.
-\bibitem{Nagios}
-http://www.nagios.org/
-\bibitem{tivmon}
-IBM Tivoli Monitoring.
-\newblock Available online: \texttt{http://www.ibm.com/ software/ tivoli/ products/ monitor/}
-\bibitem{hive}
-Joydeep Sen Sarma.  ``Hive as a contrib project''
-\newblock Available online: https://issues.apache.org/jira/browse/HADOOP-3601
-\bibitem{xtrace}
-Rodrigo Fonseca, George Porter, Randy H. Katz, Scott Shenker, and Ion Stoica.  X-Trace: A Pervasive Network Tracing Framework.
-\newblock In \textit{4th USENIX Symposium on Networked Systems Design \& Implementation (NSDI'07)}, Cambridge, MA, USA, April 2007.
-\bibitem{scribe}
-Scribe logfile aggregation system described by Facebook's Jeff Hammerbacher
-\url{https://issues.apache.org/jira/browse/HADOOP-2206?focusedCommentId=12542775#action_12542775}
-
-\end{thebibliography}
-
-\end{document}
-
-%		TODO:
-%	Maybe more depth in related work
-%	More performance numbers
-%	Mention "work in progress" somewhere up front?
-%	Better screen shot for HICC
-
-
-

BIN
src/contrib/chukwa/docs/paper/hicc_in_action2.png


+ 0 - 94
src/contrib/chukwa/docs/paper/usenix.sty

@@ -1,94 +0,0 @@
-% usenix.sty - to be used with latex2e for USENIX.
-% To use this style file, look at the template usenix_template.tex
-%
-% $Id: usenix.sty,v 1.2 2005/02/16 22:30:47 maniatis Exp $
-%
-% The following definitions are modifications of standard article.sty
-% definitions, arranged to do a better job of matching the USENIX
-% guidelines.
-% It will automatically select two-column mode and the Times-Roman
-% font.
-
-%
-% USENIX papers are two-column.
-% Times-Roman font is nice if you can get it (requires NFSS,
-% which is in latex2e.
-
-\if@twocolumn\else\input twocolumn.sty\fi
-\usepackage{times}
-
-%
-% USENIX wants margins of: 1" sides, 1" bottom, and 1" top.
-% 0.25" gutter between columns.
-% Gives active areas of 6.5" x 9"
-%
-\setlength{\textheight}{9.0in}
-\setlength{\columnsep}{0.25in}
-\setlength{\textwidth}{6.50in}
-
-\setlength{\topmargin}{0.0in}
-
-\setlength{\headheight}{0.0in}
-
-\setlength{\headsep}{0.0in}
-
-% Usenix wants no page numbers for camera-ready papers, so that they can
-% number them themselves.  But submitted papers should have page numbers
-% for the reviewers' convenience.
-% 
-%
-% \pagestyle{empty}
-
-%
-% Usenix titles are in 14-point bold type, with no date, and with no
-% change in the empty page headers.  The whole author section is 12 point
-% italic--- you must use {\rm } around the actual author names to get
-% them in roman.
-%
-\def\maketitle{\par
- \begingroup
-   \renewcommand\thefootnote{\fnsymbol{footnote}}%
-   \def\@makefnmark{\hbox to\z@{$\m@th^{\@thefnmark}$\hss}}%
-    \long\def\@makefntext##1{\parindent 1em\noindent
-            \hbox to1.8em{\hss$\m@th^{\@thefnmark}$}##1}%
-   \if@twocolumn
-     \twocolumn[\@maketitle]%
-     \else \newpage
-     \global\@topnum\z@
-     \@maketitle \fi\@thanks
- \endgroup
- \setcounter{footnote}{0}%
- \let\maketitle\relax
- \let\@maketitle\relax
- \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
-
-\def\@maketitle{\newpage
- \vbox to 2.5in{
- \vspace*{\fill}
- \vskip 2em
- \begin{center}%
-  {\Large\bf \@title \par}%
-  \vskip 0.375in minus 0.300in
-  {\large\it
-   \lineskip .5em
-   \begin{tabular}[t]{c}\@author
-   \end{tabular}\par}%
- \end{center}%
- \par
- \vspace*{\fill}
-% \vskip 1.5em
- }
-}
-
-%
-% The abstract is preceded by a 12-pt bold centered heading
-\def\abstract{\begin{center}%
-{\large\bf \abstractname\vspace{-.5em}\vspace{\z@}}%
-\end{center}}
-\def\endabstract{}
-
-%
-% Main section titles are 12-pt bold.  Others can be same or smaller.
-%
-\def\section{\@startsection {section}{1}{\z@}{-3.5ex plus-1ex minus
-    -.2ex}{2.3ex plus.2ex}{\reset@font\large\bf}}

BIN
src/contrib/chukwa/docs/paper/widget.png


BIN
src/contrib/chukwa/hadoopjars/hadoop-0.18.0-core.jar


+ 0 - 97
src/contrib/chukwa/ivy.xml

@@ -1,97 +0,0 @@
-<?xml version="1.0" ?>
-<ivy-module version="1.0">
-  <info organisation="org.apache.hadoop" module="${ant.project.name}">
-    <license name="Apache 2.0"/>
-    <ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
-    <description>
-        Apache Hadoop Chukwa
-    </description>
-  </info>
-  <configurations defaultconfmapping="default">
-    <!--these match the Maven configurations-->
-    <conf name="default" extends="master,runtime"/>
-    <conf name="master" description="contains the artifact but no dependencies"/>
-    <conf name="runtime" description="runtime but not the artifact" />
-    <!--Private configurations. -->
-
-    <conf name="common" visibility="private" 
-      extends="jetty"
-      description="artifacts needed to compile/test the application"/>
-    <conf name="jetty" visibility="private" />
-    
-  </configurations>
-
-  <publications>
-    <!--get the artifact from our module name-->
-    <artifact conf="master"/>
-  </publications>
-  <dependencies>
-   <dependency org="commons-fileupload"
-      name="commons-fileupload"
-      rev="${commons-fileupload.version}"
-      conf="common->default"/>
-   <dependency org="commons-httpclient"
-      name="commons-httpclient"
-      rev="${commons-httpclient.version}"
-      conf="common->master"/>  
-    <dependency org="commons-io"
-      name="commons-io"
-      rev="${commons-io.version}"
-      conf="common->default"/>  
-    <dependency org="commons-logging"
-      name="commons-logging"
-      rev="${commons-logging.version}"
-      conf="common->default"/>
-    <dependency org="commons-codec"
-      name="commons-codec"
-      rev="${commons-codec.version}"
-      conf="common->master"/>   
-  <dependency org="commons-logging"
-      name="commons-logging-api"
-      rev="${commons-logging-api.version}"
-      conf="common->master"/>   
-   <dependency org="commons-net"
-      name="commons-net"
-      rev="${commons-net.version}"
-      conf="common->master"/> 
-    <dependency org="tomcat"
-      name="servlet-api"
-      rev="${servlet-api.version}"
-      conf="common->default"/>   
-    <dependency org="org.mortbay.jetty"
-      name="jetty"
-      rev="${jetty.version}"
-      conf="common->default"/>   
-<!-- Not able to figureout the version - resolving loaclly  -->
-<!--
-    <dependency org="org.json"
-      name="json"
-      rev="${json.version}"
-      conf="common->default"/>  
-    <dependency org="org.apache.tomcat"
-      name="jsp-api"
-      rev="${jsp-api.version}"
-      conf="common->default"/>
-    <dependency org="taglibs"
-      name="application"
-      rev="${taglibs.version}"
-      conf="common->default"/> -->
-    <dependency org="org.mortbay.jetty"
-      name="jetty-util"
-      rev="${jetty-util.version}"
-      conf="common->default"/>
-    <dependency org="junit"
-      name="junit"
-      rev="${junit.version}"
-      conf="common->default"/> 
-    <dependency org="tomcat"
-      name="servlet"
-      rev="${servlet.version}"
-      conf="common->default"/>   
-    <dependency org="log4j"
-      name="log4j"
-      rev="${log4j.version}"
-      conf="common->master"/>
-    </dependencies>
-  
-</ivy-module>

+ 0 - 63
src/contrib/chukwa/ivy/ivysettings.xml

@@ -1,63 +0,0 @@
-<ivysettings>
-  <!--
-  see http://www.jayasoft.org/ivy/doc/configuration
-  -->
-  <!-- you can override this property to use mirrors
-          http://repo1.maven.org/maven2/
-          http://mirrors.dotsrc.org/maven2
-          http://ftp.ggi-project.org/pub/packages/maven2
-          http://mirrors.sunsite.dk/maven2
-          http://public.planetmirror.com/pub/maven2
-          http://ibiblio.lsu.edu/main/pub/packages/maven2
-          http://www.ibiblio.net/pub/packages/maven2
-  -->
-  <property name="repo.maven.org"
-    value="http://repo1.maven.org/maven2/"
-    override="false"/>
-  <property name="snapshot.apache.org"
-    value="http://people.apache.org/repo/m2-snapshot-repository/"
-    override="false"/>
-  <property name="maven2.pattern"
-    value="[organisation]/[module]/[revision]/[module]-[revision]"/>
-  <property name="maven2.pattern.ext"
-    value="${maven2.pattern}.[ext]"/>
-  <!-- pull in the local repository -->
-  <include url="${ivy.default.conf.dir}/ivyconf-local.xml"/>
-  <settings defaultResolver="default"/>
-  <resolvers>
-    <ibiblio name="maven2"
-      root="${repo.maven.org}"
-      pattern="${maven2.pattern.ext}"
-      m2compatible="true"
-      />
-    <ibiblio name="apache-snapshot"
-      root="${snapshot.apache.org}"
-      pattern="${maven2.pattern.ext}"
-      m2compatible="true"
-      />
-    <chain name="default" dual="true">
-      <resolver ref="local"/>
-      <resolver ref="maven2"/>
-    </chain>
-    <chain name="internal">
-      <resolver ref="local"/>
-    </chain>
-    <chain name="external">
-      <resolver ref="maven2"/>
-    </chain>
-    <chain name="external-and-snapshots">
-      <resolver ref="maven2"/>
-      <resolver ref="apache-snapshot"/>
-    </chain>
-  </resolvers>
-  <modules>
-    <!--
-    This forces a requirement for other hadoop-artifacts to be built locally
-    rather than look for them online.
-
-    -->
-    <module organisation="org.apache.hadoop" name=".*" resolver="internal"/>
-    <!--until commons cli is external, we need to pull it in from the snapshot repository -if present -->
-    <module organisation="org.apache.commons" name=".*" resolver="external-and-snapshots"/>
-  </modules>
-</ivysettings>

+ 0 - 32
src/contrib/chukwa/ivy/libraries.properties

@@ -1,32 +0,0 @@
-#This properties file lists the versions of the various artifacts used by chukwa.
-
-
-
-hadoop.version=0.20.0-dev
-
-
-#These are the versions of our dependencies (in alphabetical order)
-commons-fileupload.version=1.2
-commons-httpclient.version=3.0.1
-commons-logging.version=1.0.4
-commons-io.version=1.4
-commons-codec.version=1.3
-commons-net.version=1.4.1
-commons-logging-api.version=1.0.4
-
-#couldnt makeout the version of jsp-api
-#jsp-api.version=
-ivy.version=2.0.0-rc2
-# chukwa uses a diff. verion of jetty than the Hadoop
-jetty.version=6.1.11
-jetty-util.version=6.1.11
-
-junit.version=3.8.1
-
-log4j.version=1.2.13
-
-servlet.version=4.0.6
-servlet-api.version=5.5.12
-
-# Not able to figureout the taglibs version 
-#taglibs.version=

+ 0 - 9
src/contrib/chukwa/lib/json-LICENSE.txt

@@ -1,9 +0,0 @@
-Copyright (c) 2002 JSON.org
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
-
-The Software shall be used for Good, not Evil.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

+ 0 - 563
src/contrib/chukwa/lib/json-README.txt

@@ -1,563 +0,0 @@
-
-
-
-
-
-
-Network Working Group                                       D. Crockford
-Request for Comments: 4627                                      JSON.org
-Category: Informational                                        July 2006
-
-
- The application/json Media Type for JavaScript Object Notation (JSON)
-
-Status of This Memo
-
-   This memo provides information for the Internet community.  It does
-   not specify an Internet standard of any kind.  Distribution of this
-   memo is unlimited.
-
-Copyright Notice
-
-   Copyright (C) The Internet Society (2006).
-
-Abstract
-
-   JavaScript Object Notation (JSON) is a lightweight, text-based,
-   language-independent data interchange format.  It was derived from
-   the ECMAScript Programming Language Standard.  JSON defines a small
-   set of formatting rules for the portable representation of structured
-   data.
-
-1.  Introduction
-
-   JavaScript Object Notation (JSON) is a text format for the
-   serialization of structured data.  It is derived from the object
-   literals of JavaScript, as defined in the ECMAScript Programming
-   Language Standard, Third Edition [ECMA].
-
-   JSON can represent four primitive types (strings, numbers, booleans,
-   and null) and two structured types (objects and arrays).
-
-   A string is a sequence of zero or more Unicode characters [UNICODE].
-
-   An object is an unordered collection of zero or more name/value
-   pairs, where a name is a string and a value is a string, number,
-   boolean, null, object, or array.
-
-   An array is an ordered sequence of zero or more values.
-
-   The terms "object" and "array" come from the conventions of
-   JavaScript.
-
-   JSON's design goals were for it to be minimal, portable, textual, and
-   a subset of JavaScript.
-
-
-
-Crockford                    Informational                      [Page 1]
-
-RFC 4627                          JSON                         July 2006
-
-
-1.1.  Conventions Used in This Document
-
-   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
-   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
-   document are to be interpreted as described in [RFC2119].
-
-   The grammatical rules in this document are to be interpreted as
-   described in [RFC4234].
-
-2.  JSON Grammar
-
-   A JSON text is a sequence of tokens.  The set of tokens includes six
-   structural characters, strings, numbers, and three literal names.
-
-   A JSON text is a serialized object or array.
-
-      JSON-text = object / array
-
-   These are the six structural characters:
-
-      begin-array     = ws %x5B ws  ; [ left square bracket
-
-      begin-object    = ws %x7B ws  ; { left curly bracket
-
-      end-array       = ws %x5D ws  ; ] right square bracket
-
-      end-object      = ws %x7D ws  ; } right curly bracket
-
-      name-separator  = ws %x3A ws  ; : colon
-
-      value-separator = ws %x2C ws  ; , comma
-
-   Insignificant whitespace is allowed before or after any of the six
-   structural characters.
-
-      ws = *(
-                %x20 /              ; Space
-                %x09 /              ; Horizontal tab
-                %x0A /              ; Line feed or New line
-                %x0D                ; Carriage return
-            )
-
-2.1.  Values
-
-   A JSON value MUST be an object, array, number, or string, or one of
-   the following three literal names:
-
-      false null true
-
-
-
-Crockford                    Informational                      [Page 2]
-
-RFC 4627                          JSON                         July 2006
-
-
-   The literal names MUST be lowercase.  No other literal names are
-   allowed.
-
-         value = false / null / true / object / array / number / string
-
-         false = %x66.61.6c.73.65   ; false
-
-         null  = %x6e.75.6c.6c      ; null
-
-         true  = %x74.72.75.65      ; true
-
-2.2.  Objects
-
-   An object structure is represented as a pair of curly brackets
-   surrounding zero or more name/value pairs (or members).  A name is a
-   string.  A single colon comes after each name, separating the name
-   from the value.  A single comma separates a value from a following
-   name.  The names within an object SHOULD be unique.
-
-      object = begin-object [ member *( value-separator member ) ]
-      end-object
-
-      member = string name-separator value
-
-2.3.  Arrays
-
-   An array structure is represented as square brackets surrounding zero
-   or more values (or elements).  Elements are separated by commas.
-
-      array = begin-array [ value *( value-separator value ) ] end-array
-
-2.4.  Numbers
-
-   The representation of numbers is similar to that used in most
-   programming languages.  A number contains an integer component that
-   may be prefixed with an optional minus sign, which may be followed by
-   a fraction part and/or an exponent part.
-
-   Octal and hex forms are not allowed.  Leading zeros are not allowed.
-
-   A fraction part is a decimal point followed by one or more digits.
-
-   An exponent part begins with the letter E in upper or lowercase,
-   which may be followed by a plus or minus sign.  The E and optional
-   sign are followed by one or more digits.
-
-   Numeric values that cannot be represented as sequences of digits
-   (such as Infinity and NaN) are not permitted.
-
-
-
-Crockford                    Informational                      [Page 3]
-
-RFC 4627                          JSON                         July 2006
-
-
-         number = [ minus ] int [ frac ] [ exp ]
-
-         decimal-point = %x2E       ; .
-
-         digit1-9 = %x31-39         ; 1-9
-
-         e = %x65 / %x45            ; e E
-
-         exp = e [ minus / plus ] 1*DIGIT
-
-         frac = decimal-point 1*DIGIT
-
-         int = zero / ( digit1-9 *DIGIT )
-
-         minus = %x2D               ; -
-
-         plus = %x2B                ; +
-
-         zero = %x30                ; 0
-
-2.5.  Strings
-
-   The representation of strings is similar to conventions used in the C
-   family of programming languages.  A string begins and ends with
-   quotation marks.  All Unicode characters may be placed within the
-   quotation marks except for the characters that must be escaped:
-   quotation mark, reverse solidus, and the control characters (U+0000
-   through U+001F).
-
-   Any character may be escaped.  If the character is in the Basic
-   Multilingual Plane (U+0000 through U+FFFF), then it may be
-   represented as a six-character sequence: a reverse solidus, followed
-   by the lowercase letter u, followed by four hexadecimal digits that
-   encode the character's code point.  The hexadecimal letters A though
-   F can be upper or lowercase.  So, for example, a string containing
-   only a single reverse solidus character may be represented as
-   "\u005C".
-
-   Alternatively, there are two-character sequence escape
-   representations of some popular characters.  So, for example, a
-   string containing only a single reverse solidus character may be
-   represented more compactly as "\\".
-
-   To escape an extended character that is not in the Basic Multilingual
-   Plane, the character is represented as a twelve-character sequence,
-   encoding the UTF-16 surrogate pair.  So, for example, a string
-   containing only the G clef character (U+1D11E) may be represented as
-   "\uD834\uDD1E".
-
-
-
-Crockford                    Informational                      [Page 4]
-
-RFC 4627                          JSON                         July 2006
-
-
-         string = quotation-mark *char quotation-mark
-
-         char = unescaped /
-                escape (
-                    %x22 /          ; "    quotation mark  U+0022
-                    %x5C /          ; \    reverse solidus U+005C
-                    %x2F /          ; /    solidus         U+002F
-                    %x62 /          ; b    backspace       U+0008
-                    %x66 /          ; f    form feed       U+000C
-                    %x6E /          ; n    line feed       U+000A
-                    %x72 /          ; r    carriage return U+000D
-                    %x74 /          ; t    tab             U+0009
-                    %x75 4HEXDIG )  ; uXXXX                U+XXXX
-
-         escape = %x5C              ; \
-
-         quotation-mark = %x22      ; "
-
-         unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
-
-3.  Encoding
-
-   JSON text SHALL be encoded in Unicode.  The default encoding is
-   UTF-8.
-
-   Since the first two characters of a JSON text will always be ASCII
-   characters [RFC0020], it is possible to determine whether an octet
-   stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
-   at the pattern of nulls in the first four octets.
-
-           00 00 00 xx  UTF-32BE
-           00 xx 00 xx  UTF-16BE
-           xx 00 00 00  UTF-32LE
-           xx 00 xx 00  UTF-16LE
-           xx xx xx xx  UTF-8
-
-4.  Parsers
-
-   A JSON parser transforms a JSON text into another representation.  A
-   JSON parser MUST accept all texts that conform to the JSON grammar.
-   A JSON parser MAY accept non-JSON forms or extensions.
-
-   An implementation may set limits on the size of texts that it
-   accepts.  An implementation may set limits on the maximum depth of
-   nesting.  An implementation may set limits on the range of numbers.
-   An implementation may set limits on the length and character contents
-   of strings.
-
-
-
-
-Crockford                    Informational                      [Page 5]
-
-RFC 4627                          JSON                         July 2006
-
-
-5. Generators
-
-   A JSON generator produces JSON text.  The resulting text MUST
-   strictly conform to the JSON grammar.
-
-6. IANA Considerations
-
-   The MIME media type for JSON text is application/json.
-
-   Type name: application
-
-   Subtype name: json
-
-   Required parameters: n/a
-
-   Optional parameters: n/a
-
-   Encoding considerations: 8bit if UTF-8; binary if UTF-16 or UTF-32
-
-      JSON may be represented using UTF-8, UTF-16, or UTF-32.  When JSON
-      is written in UTF-8, JSON is 8bit compatible.  When JSON is
-      written in UTF-16 or UTF-32, the binary content-transfer-encoding
-      must be used.
-
-   Security considerations:
-
-   Generally there are security issues with scripting languages.  JSON
-   is a subset of JavaScript, but it is a safe subset that excludes
-   assignment and invocation.
-
-   A JSON text can be safely passed into JavaScript's eval() function
-   (which compiles and executes a string) if all the characters not
-   enclosed in strings are in the set of characters that form JSON
-   tokens.  This can be quickly determined in JavaScript with two
-   regular expressions and calls to the test and replace methods.
-
-      var my_JSON_object = !(/[^,:{}\[\]0-9.\-+Eaeflnr-u \n\r\t]/.test(
-             text.replace(/"(\\.|[^"\\])*"/g, ''))) &&
-         eval('(' + text + ')');
-
-   Interoperability considerations: n/a
-
-   Published specification: RFC 4627
-
-
-
-
-
-
-
-
-Crockford                    Informational                      [Page 6]
-
-RFC 4627                          JSON                         July 2006
-
-
-   Applications that use this media type:
-
-      JSON has been used to exchange data between applications written
-      in all of these programming languages: ActionScript, C, C#,
-      ColdFusion, Common Lisp, E, Erlang, Java, JavaScript, Lua,
-      Objective CAML, Perl, PHP, Python, Rebol, Ruby, and Scheme.
-
-   Additional information:
-
-      Magic number(s): n/a
-      File extension(s): .json
-      Macintosh file type code(s): TEXT
-
-   Person & email address to contact for further information:
-      Douglas Crockford
-      douglas@crockford.com
-
-   Intended usage: COMMON
-
-   Restrictions on usage: none
-
-   Author:
-      Douglas Crockford
-      douglas@crockford.com
-
-   Change controller:
-      Douglas Crockford
-      douglas@crockford.com
-
-7. Security Considerations
-
-   See Security Considerations in Section 6.
-
-8. Examples
-
-   This is a JSON object:
-
-   {
-      "Image": {
-          "Width":  800,
-          "Height": 600,
-          "Title":  "View from 15th Floor",
-          "Thumbnail": {
-              "Url":    "http://www.example.com/image/481989943",
-              "Height": 125,
-              "Width":  "100"
-          },
-          "IDs": [116, 943, 234, 38793]
-
-
-
-Crockford                    Informational                      [Page 7]
-
-RFC 4627                          JSON                         July 2006
-
-
-        }
-   }
-
-   Its Image member is an object whose Thumbnail member is an object
-   and whose IDs member is an array of numbers.
-
-   This is a JSON array containing two objects:
-
-   [
-      {
-         "precision": "zip",
-         "Latitude":  37.7668,
-         "Longitude": -122.3959,
-         "Address":   "",
-         "City":      "SAN FRANCISCO",
-         "State":     "CA",
-         "Zip":       "94107",
-         "Country":   "US"
-      },
-      {
-         "precision": "zip",
-         "Latitude":  37.371991,
-         "Longitude": -122.026020,
-         "Address":   "",
-         "City":      "SUNNYVALE",
-         "State":     "CA",
-         "Zip":       "94085",
-         "Country":   "US"
-      }
-   ]
-
-9. References
-
-9.1.  Normative References
-
-   [ECMA]    European Computer Manufacturers Association, "ECMAScript
-             Language Specification 3rd Edition", December 1999,
-             <http://www.ecma-international.org/publications/files/
-             ecma-st/ECMA-262.pdf>.
-
-   [RFC0020] Cerf, V., "ASCII format for network interchange", RFC 20,
-             October 1969.
-
-   [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
-             Requirement Levels", BCP 14, RFC 2119, March 1997.
-
-   [RFC4234] Crocker, D. and P.  Overell, "Augmented BNF for Syntax
-             Specifications: ABNF", RFC 4234, October 2005.
-
-
-
-Crockford                    Informational                      [Page 8]
-
-RFC 4627                          JSON                         July 2006
-
-
-   [UNICODE] The Unicode Consortium, "The Unicode Standard Version 4.0",
-             2003, <http://www.unicode.org/versions/Unicode4.1.0/>.
-
-Author's Address
-
-   Douglas Crockford
-   JSON.org
-   EMail: douglas@crockford.com
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Crockford                    Informational                      [Page 9]
-
-RFC 4627                          JSON                         July 2006
-
-
-Full Copyright Statement
-
-   Copyright (C) The Internet Society (2006).
-
-   This document is subject to the rights, licenses and restrictions
-   contained in BCP 78, and except as set forth therein, the authors
-   retain all their rights.
-
-   This document and the information contained herein are provided on an
-   "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
-   OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET
-   ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED,
-   INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE
-   INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
-   WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
-
-Intellectual Property
-
-   The IETF takes no position regarding the validity or scope of any
-   Intellectual Property Rights or other rights that might be claimed to
-   pertain to the implementation or use of the technology described in
-   this document or the extent to which any license under such rights
-   might or might not be available; nor does it represent that it has
-   made any independent effort to identify any such rights.  Information
-   on the procedures with respect to rights in RFC documents can be
-   found in BCP 78 and BCP 79.
-
-   Copies of IPR disclosures made to the IETF Secretariat and any
-   assurances of licenses to be made available, or the result of an
-   attempt made to obtain a general license or permission for the use of
-   such proprietary rights by implementers or users of this
-   specification can be obtained from the IETF on-line IPR repository at
-   http://www.ietf.org/ipr.
-
-   The IETF invites any interested party to bring to its attention any
-   copyrights, patents or patent applications, or other proprietary
-   rights that may cover technology that may be required to implement
-   this standard.  Please address the information to the IETF at
-   ietf-ipr@ietf.org.
-
-Acknowledgement
-
-   Funding for the RFC Editor function is provided by the IETF
-   Administrative Support Activity (IASA).
-
-
-
-
-
-
-
-Crockford                    Informational                     [Page 10]
-

BIN
src/contrib/chukwa/lib/json.jar


+ 0 - 286
src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/ChukwaArchiveKey.java

@@ -1,286 +0,0 @@
-// File generated by hadoop record compiler. Do not edit.
-package org.apache.hadoop.chukwa;
-
-public class ChukwaArchiveKey extends org.apache.hadoop.record.Record {
-  private static final org.apache.hadoop.record.meta.RecordTypeInfo _rio_recTypeInfo;
-  private static org.apache.hadoop.record.meta.RecordTypeInfo _rio_rtiFilter;
-  private static int[] _rio_rtiFilterFields;
-  static {
-    _rio_recTypeInfo = new org.apache.hadoop.record.meta.RecordTypeInfo("ChukwaArchiveKey");
-    _rio_recTypeInfo.addField("timePartition", org.apache.hadoop.record.meta.TypeID.LongTypeID);
-    _rio_recTypeInfo.addField("dataType", org.apache.hadoop.record.meta.TypeID.StringTypeID);
-    _rio_recTypeInfo.addField("streamName", org.apache.hadoop.record.meta.TypeID.StringTypeID);
-    _rio_recTypeInfo.addField("seqId", org.apache.hadoop.record.meta.TypeID.LongTypeID);
-  }
-  
-  private long timePartition;
-  private String dataType;
-  private String streamName;
-  private long seqId;
-  public ChukwaArchiveKey() { }
-  public ChukwaArchiveKey(
-    final long timePartition,
-    final String dataType,
-    final String streamName,
-    final long seqId) {
-    this.timePartition = timePartition;
-    this.dataType = dataType;
-    this.streamName = streamName;
-    this.seqId = seqId;
-  }
-  public static org.apache.hadoop.record.meta.RecordTypeInfo getTypeInfo() {
-    return _rio_recTypeInfo;
-  }
-  public static void setTypeFilter(org.apache.hadoop.record.meta.RecordTypeInfo rti) {
-    if (null == rti) return;
-    _rio_rtiFilter = rti;
-    _rio_rtiFilterFields = null;
-  }
-  private static void setupRtiFields()
-  {
-    if (null == _rio_rtiFilter) return;
-    // we may already have done this
-    if (null != _rio_rtiFilterFields) return;
-    int _rio_i, _rio_j;
-    _rio_rtiFilterFields = new int [_rio_rtiFilter.getFieldTypeInfos().size()];
-    for (_rio_i=0; _rio_i<_rio_rtiFilterFields.length; _rio_i++) {
-      _rio_rtiFilterFields[_rio_i] = 0;
-    }
-    java.util.Iterator<org.apache.hadoop.record.meta.FieldTypeInfo> _rio_itFilter = _rio_rtiFilter.getFieldTypeInfos().iterator();
-    _rio_i=0;
-    while (_rio_itFilter.hasNext()) {
-      org.apache.hadoop.record.meta.FieldTypeInfo _rio_tInfoFilter = _rio_itFilter.next();
-      java.util.Iterator<org.apache.hadoop.record.meta.FieldTypeInfo> _rio_it = _rio_recTypeInfo.getFieldTypeInfos().iterator();
-      _rio_j=1;
-      while (_rio_it.hasNext()) {
-        org.apache.hadoop.record.meta.FieldTypeInfo _rio_tInfo = _rio_it.next();
-        if (_rio_tInfo.equals(_rio_tInfoFilter)) {
-          _rio_rtiFilterFields[_rio_i] = _rio_j;
-          break;
-        }
-        _rio_j++;
-      }
-      _rio_i++;
-    }
-  }
-  public long getTimePartition() {
-    return timePartition;
-  }
-  public void setTimePartition(final long timePartition) {
-    this.timePartition=timePartition;
-  }
-  public String getDataType() {
-    return dataType;
-  }
-  public void setDataType(final String dataType) {
-    this.dataType=dataType;
-  }
-  public String getStreamName() {
-    return streamName;
-  }
-  public void setStreamName(final String streamName) {
-    this.streamName=streamName;
-  }
-  public long getSeqId() {
-    return seqId;
-  }
-  public void setSeqId(final long seqId) {
-    this.seqId=seqId;
-  }
-  public void serialize(final org.apache.hadoop.record.RecordOutput _rio_a, final String _rio_tag)
-  throws java.io.IOException {
-    _rio_a.startRecord(this,_rio_tag);
-    _rio_a.writeLong(timePartition,"timePartition");
-    _rio_a.writeString(dataType,"dataType");
-    _rio_a.writeString(streamName,"streamName");
-    _rio_a.writeLong(seqId,"seqId");
-    _rio_a.endRecord(this,_rio_tag);
-  }
-  private void deserializeWithoutFilter(final org.apache.hadoop.record.RecordInput _rio_a, final String _rio_tag)
-  throws java.io.IOException {
-    _rio_a.startRecord(_rio_tag);
-    timePartition=_rio_a.readLong("timePartition");
-    dataType=_rio_a.readString("dataType");
-    streamName=_rio_a.readString("streamName");
-    seqId=_rio_a.readLong("seqId");
-    _rio_a.endRecord(_rio_tag);
-  }
-  public void deserialize(final org.apache.hadoop.record.RecordInput _rio_a, final String _rio_tag)
-  throws java.io.IOException {
-    if (null == _rio_rtiFilter) {
-      deserializeWithoutFilter(_rio_a, _rio_tag);
-      return;
-    }
-    // if we're here, we need to read based on version info
-    _rio_a.startRecord(_rio_tag);
-    setupRtiFields();
-    for (int _rio_i=0; _rio_i<_rio_rtiFilter.getFieldTypeInfos().size(); _rio_i++) {
-      if (1 == _rio_rtiFilterFields[_rio_i]) {
-        timePartition=_rio_a.readLong("timePartition");
-      }
-      else if (2 == _rio_rtiFilterFields[_rio_i]) {
-        dataType=_rio_a.readString("dataType");
-      }
-      else if (3 == _rio_rtiFilterFields[_rio_i]) {
-        streamName=_rio_a.readString("streamName");
-      }
-      else if (4 == _rio_rtiFilterFields[_rio_i]) {
-        seqId=_rio_a.readLong("seqId");
-      }
-      else {
-        java.util.ArrayList<org.apache.hadoop.record.meta.FieldTypeInfo> typeInfos = (java.util.ArrayList<org.apache.hadoop.record.meta.FieldTypeInfo>)(_rio_rtiFilter.getFieldTypeInfos());
-        org.apache.hadoop.record.meta.Utils.skip(_rio_a, typeInfos.get(_rio_i).getFieldID(), typeInfos.get(_rio_i).getTypeID());
-      }
-    }
-    _rio_a.endRecord(_rio_tag);
-  }
-  public int compareTo (final Object _rio_peer_) throws ClassCastException {
-    if (!(_rio_peer_ instanceof ChukwaArchiveKey)) {
-      throw new ClassCastException("Comparing different types of records.");
-    }
-    ChukwaArchiveKey _rio_peer = (ChukwaArchiveKey) _rio_peer_;
-    int _rio_ret = 0;
-    _rio_ret = (timePartition == _rio_peer.timePartition)? 0 :((timePartition<_rio_peer.timePartition)?-1:1);
-    if (_rio_ret != 0) return _rio_ret;
-    _rio_ret = dataType.compareTo(_rio_peer.dataType);
-    if (_rio_ret != 0) return _rio_ret;
-    _rio_ret = streamName.compareTo(_rio_peer.streamName);
-    if (_rio_ret != 0) return _rio_ret;
-    _rio_ret = (seqId == _rio_peer.seqId)? 0 :((seqId<_rio_peer.seqId)?-1:1);
-    if (_rio_ret != 0) return _rio_ret;
-    return _rio_ret;
-  }
-  public boolean equals(final Object _rio_peer_) {
-    if (!(_rio_peer_ instanceof ChukwaArchiveKey)) {
-      return false;
-    }
-    if (_rio_peer_ == this) {
-      return true;
-    }
-    ChukwaArchiveKey _rio_peer = (ChukwaArchiveKey) _rio_peer_;
-    boolean _rio_ret = false;
-    _rio_ret = (timePartition==_rio_peer.timePartition);
-    if (!_rio_ret) return _rio_ret;
-    _rio_ret = dataType.equals(_rio_peer.dataType);
-    if (!_rio_ret) return _rio_ret;
-    _rio_ret = streamName.equals(_rio_peer.streamName);
-    if (!_rio_ret) return _rio_ret;
-    _rio_ret = (seqId==_rio_peer.seqId);
-    if (!_rio_ret) return _rio_ret;
-    return _rio_ret;
-  }
-  public Object clone() throws CloneNotSupportedException {
-    ChukwaArchiveKey _rio_other = new ChukwaArchiveKey();
-    _rio_other.timePartition = this.timePartition;
-    _rio_other.dataType = this.dataType;
-    _rio_other.streamName = this.streamName;
-    _rio_other.seqId = this.seqId;
-    return _rio_other;
-  }
-  public int hashCode() {
-    int _rio_result = 17;
-    int _rio_ret;
-    _rio_ret = (int) (timePartition^(timePartition>>>32));
-    _rio_result = 37*_rio_result + _rio_ret;
-    _rio_ret = dataType.hashCode();
-    _rio_result = 37*_rio_result + _rio_ret;
-    _rio_ret = streamName.hashCode();
-    _rio_result = 37*_rio_result + _rio_ret;
-    _rio_ret = (int) (seqId^(seqId>>>32));
-    _rio_result = 37*_rio_result + _rio_ret;
-    return _rio_result;
-  }
-  public static String signature() {
-    return "LChukwaArchiveKey(lssl)";
-  }
-  public static class Comparator extends org.apache.hadoop.record.RecordComparator {
-    public Comparator() {
-      super(ChukwaArchiveKey.class);
-    }
-    static public int slurpRaw(byte[] b, int s, int l) {
-      try {
-        int os = s;
-        {
-          long i = org.apache.hadoop.record.Utils.readVLong(b, s);
-          int z = org.apache.hadoop.record.Utils.getVIntSize(i);
-          s+=z; l-=z;
-        }
-        {
-          int i = org.apache.hadoop.record.Utils.readVInt(b, s);
-          int z = org.apache.hadoop.record.Utils.getVIntSize(i);
-          s+=(z+i); l-= (z+i);
-        }
-        {
-          int i = org.apache.hadoop.record.Utils.readVInt(b, s);
-          int z = org.apache.hadoop.record.Utils.getVIntSize(i);
-          s+=(z+i); l-= (z+i);
-        }
-        {
-          long i = org.apache.hadoop.record.Utils.readVLong(b, s);
-          int z = org.apache.hadoop.record.Utils.getVIntSize(i);
-          s+=z; l-=z;
-        }
-        return (os - s);
-      } catch(java.io.IOException e) {
-        throw new RuntimeException(e);
-      }
-    }
-    static public int compareRaw(byte[] b1, int s1, int l1,
-                                   byte[] b2, int s2, int l2) {
-      try {
-        int os1 = s1;
-        {
-          long i1 = org.apache.hadoop.record.Utils.readVLong(b1, s1);
-          long i2 = org.apache.hadoop.record.Utils.readVLong(b2, s2);
-          if (i1 != i2) {
-            return ((i1-i2) < 0) ? -1 : 0;
-          }
-          int z1 = org.apache.hadoop.record.Utils.getVIntSize(i1);
-          int z2 = org.apache.hadoop.record.Utils.getVIntSize(i2);
-          s1+=z1; s2+=z2; l1-=z1; l2-=z2;
-        }
-        {
-          int i1 = org.apache.hadoop.record.Utils.readVInt(b1, s1);
-          int i2 = org.apache.hadoop.record.Utils.readVInt(b2, s2);
-          int z1 = org.apache.hadoop.record.Utils.getVIntSize(i1);
-          int z2 = org.apache.hadoop.record.Utils.getVIntSize(i2);
-          s1+=z1; s2+=z2; l1-=z1; l2-=z2;
-          int r1 = org.apache.hadoop.record.Utils.compareBytes(b1,s1,i1,b2,s2,i2);
-          if (r1 != 0) { return (r1<0)?-1:0; }
-          s1+=i1; s2+=i2; l1-=i1; l1-=i2;
-        }
-        {
-          int i1 = org.apache.hadoop.record.Utils.readVInt(b1, s1);
-          int i2 = org.apache.hadoop.record.Utils.readVInt(b2, s2);
-          int z1 = org.apache.hadoop.record.Utils.getVIntSize(i1);
-          int z2 = org.apache.hadoop.record.Utils.getVIntSize(i2);
-          s1+=z1; s2+=z2; l1-=z1; l2-=z2;
-          int r1 = org.apache.hadoop.record.Utils.compareBytes(b1,s1,i1,b2,s2,i2);
-          if (r1 != 0) { return (r1<0)?-1:0; }
-          s1+=i1; s2+=i2; l1-=i1; l1-=i2;
-        }
-        {
-          long i1 = org.apache.hadoop.record.Utils.readVLong(b1, s1);
-          long i2 = org.apache.hadoop.record.Utils.readVLong(b2, s2);
-          if (i1 != i2) {
-            return ((i1-i2) < 0) ? -1 : 0;
-          }
-          int z1 = org.apache.hadoop.record.Utils.getVIntSize(i1);
-          int z2 = org.apache.hadoop.record.Utils.getVIntSize(i2);
-          s1+=z1; s2+=z2; l1-=z1; l2-=z2;
-        }
-        return (os1 - s1);
-      } catch(java.io.IOException e) {
-        throw new RuntimeException(e);
-      }
-    }
-    public int compare(byte[] b1, int s1, int l1,
-                         byte[] b2, int s2, int l2) {
-      int ret = compareRaw(b1,s1,l1,b2,s2,l2);
-      return (ret == -1)? -1 : ((ret==0)? 1 : 0);}
-  }
-  
-  static {
-    org.apache.hadoop.record.RecordComparator.define(ChukwaArchiveKey.class, new Comparator());
-  }
-}

+ 0 - 108
src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/Chunk.java

@@ -1,108 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.chukwa;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.chukwa.datacollection.adaptor.*;
-
-/**
- * A chunk is a sequence of bytes at a particular logical offset in a stream,
- * and containing one or more "records".
- *  Chunks have various metadata, such as source, format,
- * and pointers to record boundaries within the chunk.
- * 
- */
-public interface Chunk {
-	
-//these conceptually are really network addresses
-	public String getSource();
-	public void setSource(String logSource);
-	
-	/**
-	 * Get the name of the stream that this Chunk is a chunk of
-	 * @return the name of this stream; e.g. file name
-	 */
-	public String getStreamName();
-	public void setStreamName(String streamName);
-	
-	public String getApplication();  
-  public void setApplication(String a);
-	
-  //These describe the format of the data buffer
-  public String getDataType();
-  public void setDataType(String t);
-
-  /**
-   * @return the user data in the chunk
-   */
-	public byte[] getData();
-	/**
-	 * @param logEvent the user data in the chunk
-	 */
-	public void setData(byte[] logEvent);
-	
-	/**
-	 * get/set the <b>end</b> offsets of records in the buffer.
-	 * 
-	 * We use end, rather than start offsets, since the first start
-	 * offset is always 0, but the last end offset specifies how much of the buffer is valid.
-	 * 
-	 * More precisely, offsets[i] is the offset in the Chunk of the last byte of record i
-	 *  in this chunk.
-	 * @return a list of record end offsets
-	 */
-	public int[] getRecordOffsets();
-	public void setRecordOffsets(int[] offsets);
-	
-	/**
-	 * @return  the byte offset of the first byte not in this chunk.
-	 * 
-	 * We pick this convention so that subtracting sequence IDs yields length.
-	 */
-	public long getSeqID();
-	public void setSeqID(long l);
-
-	/**
-	 * Retrieve a reference to the adaptor that sent this event.
-	 * Used by LocalAgent and Connectors to deliver acks to the appropriate place.
-	 */
-	public Adaptor getInitiator();
-	
-  /**
-   * Estimate the size of this Chunk on the wire, assuming each char of metadata takes two bytes
-   * to serialize.  This is pessimistic.
-   * @return size in bytes that this Chunk might take once serialized.
-   */
-  public int getSerializedSizeEstimate();
-  
-/**
- * @return name of cluster that this chunk comes from.
- * 
- */
-  public String getTags();  
-  
-  /**
-   * Set the name of the cluster that this chunk comes from.
-   * 
-   */
-    public void setTags(String tags);  
-  
-  public void write(DataOutput data) throws IOException;
-}

+ 0 - 63
src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/ChunkBuilder.java

@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.chukwa;
-
-import java.util.*;
-
-import org.apache.hadoop.io.DataOutputBuffer;
-import java.io.*;
-
-/**
- * Right now, just handles record collection.
- *
- */
-public class ChunkBuilder {
-  
-  ArrayList<Integer> recOffsets = new ArrayList<Integer>();
-  int lastRecOffset = -1;
-  DataOutputBuffer buf = new DataOutputBuffer();
-  /**
-   * Adds the data in rec to an internal buffer; rec can be reused immediately.
-   * @param rec
-   */
-  public void addRecord(byte[] rec)  {
-    lastRecOffset = lastRecOffset + rec.length;
-    recOffsets.add(lastRecOffset);
-    try {
-    buf.write(rec);
-    } catch(IOException e) {
-      throw new RuntimeException("buffer write failed.  Out of memory?", e);
-    }
-  }
-  
-  public Chunk getChunk() {
-    ChunkImpl c = new ChunkImpl();
-    c.setData(buf.getData());
-    c.setSeqID(buf.getLength());
-    int[] offsets = new int[recOffsets.size()];
-    for(int i = 0; i < offsets.length; ++i)
-      offsets[i] = recOffsets.get(i);
-    c.setRecordOffsets(offsets);
-    
-    return c;
-  }
-  
-  
-
-}

+ 0 - 266
src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/ChunkImpl.java

@@ -1,266 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.chukwa;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
-
-import org.apache.hadoop.chukwa.datacollection.adaptor.Adaptor;
-import org.apache.hadoop.chukwa.datacollection.agent.ChukwaAgent;
-
-public class ChunkImpl implements org.apache.hadoop.io.Writable, Chunk 
-{
-  public static int PROTOCOL_VERSION=1;
-  
-  private String source = "";
-  private String application = "";
-  private String dataType = "";
-  private String tags = "";
-  private byte[] data = null;
-  private int[] recordEndOffsets;
-  private int protocolVersion=1;
-  private String debuggingInfo="";
-  
-  private transient Adaptor initiator;
-  long seqID;
-  
-  ChunkImpl() {
-    this.tags = ChukwaAgent.getTags();
-  }
-  
-  public static ChunkImpl getBlankChunk() {
-    return new ChunkImpl();
-  }
-  
-  public ChunkImpl(String dataType, String streamName, long seq, byte[] data, Adaptor source) {
-    this.seqID = seq;
-    this.source = localHostAddr;
-    this.tags = ChukwaAgent.getTags();
-    this.application = streamName;
-    this.dataType = dataType;
-    this.data = data;
-    this.initiator = source;
-  }
-  
-  /**
-   *  @see org.apache.hadoop.chukwa.Chunk#getData()
-   */
-  public byte[] getData()	{
-  	return data;
-  }
-  
-  /**
-   *  @see org.apache.hadoop.chukwa.Chunk#setData(byte[])
-   */
-  public void setData(byte[] logEvent) {
-  	this.data = logEvent;
-  }
-  
-  /**
-   * @see org.apache.hadoop.chukwa.Chunk#getStreamName()
-   */
-  public String getStreamName() {
-  	return application;
-  }
-  
-  public void setStreamName(String logApplication)	{
-  	this.application = logApplication;
-  }
-   
-  public String getSource() {
-    return source;
-  }
-  
-  public void setSource(String logSource)	{
-  	this.source = logSource;
-  }
-  
-  public String getDebugInfo() {
-  	return debuggingInfo;
-  }
-  
-  public void setDebugInfo(String a) {
-  	this.debuggingInfo = a;
-  }
-  
-  /**
-   * @see org.apache.hadoop.chukwa.Chunk#getSeqID()
-   */
-  public long getSeqID()  {
-    return seqID;
-  }
-  
-  public void setSeqID(long l) {
-    seqID=l;
-  }
-  
-  public int getProtocolVersion() {
-	  return protocolVersion;
-  }
-  
-  public void setProtocolVersion(int pv) {
-	  this.protocolVersion = pv;
-  }
-  public String getApplication(){
-    return application;
-  }
-  
-  public void setApplication(String a){
-    application = a;
-  }
-  
-  public Adaptor getInitiator() {
-    return initiator;
-  }
-  
-  public void setInitiator(Adaptor a) {
-    initiator = a;
-  }
-  
-  
-  public void setLogSource() {
-    source = localHostAddr;
-  }
-  
-  public int[] getRecordOffsets() {
-
-    if(recordEndOffsets == null)
-      recordEndOffsets = new int[] {data.length -1};
-    return recordEndOffsets;
-  }
-  
-  public void setRecordOffsets(int[] offsets) {
-    recordEndOffsets = offsets;
-  }
-  
-  public String getDataType() {
-    return dataType;
-  }
-  
-  public void setDataType(String t) {
-    dataType = t;
-  }
-  
-  @Override
-  public void setTags(String tags)
-  {
-  	this.tags = tags;
-  }
-  
-/**
- * @see org.apache.hadoop.chukwa.Chunk#getTags()
- */
-  public String getTags() {
-    return tags;
-  }
-  
-  /**
-   * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
-   */
-  public void readFields(DataInput in) throws IOException {
-	setProtocolVersion(in.readInt());
-	if(protocolVersion!=PROTOCOL_VERSION) {
-		throw new IOException("Protocol version mismatched, drop data.  source version: "+protocolVersion+", collector version:"+PROTOCOL_VERSION);
-	}
-    setSeqID(in.readLong());
-    setSource(in.readUTF());
-    tags =  in.readUTF();    //no public set method here
-    setApplication(in.readUTF());
-    setDataType(in.readUTF());
-    setDebugInfo(in.readUTF());
-    
-    int numRecords = in.readInt();
-    recordEndOffsets = new int[numRecords];
-    for(int i=0; i < numRecords; ++i)
-      recordEndOffsets[i] = in.readInt();
-    data = new byte[recordEndOffsets[recordEndOffsets.length -1]+1 ] ;
-    in.readFully(data);
-    
-  }
-
-  /**
-   * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
-   */
-  public void write(DataOutput out) throws IOException {
-	out.writeInt(PROTOCOL_VERSION);
-    out.writeLong(seqID);
-    out.writeUTF(source);
-    out.writeUTF(tags);
-    out.writeUTF(application);
-    out.writeUTF(dataType);
-    out.writeUTF(debuggingInfo);
-    
-    if(recordEndOffsets == null)
-      recordEndOffsets = new int[] {data.length -1};
-      
-    out.writeInt(recordEndOffsets.length);
-    for(int i =0; i < recordEndOffsets.length; ++i)
-      out.writeInt(recordEndOffsets[i]);
-    
-    out.write(data, 0, recordEndOffsets[recordEndOffsets.length -1] + 1); //byte at last offset is valid
-  }
-  
-  public static ChunkImpl read(DataInput in) throws IOException {
-    ChunkImpl w = new ChunkImpl();
-    w.readFields(in);
-    return w;
-  }
-  
-    //FIXME: should do something better here, but this is OK for debugging
-  public String toString() {
-    return source+":" + application +":"+ new String(data)+ "/"+seqID;
-  }
-  
-  private static String localHostAddr;
-  static
-  {
-    try {
-      localHostAddr = InetAddress.getLocalHost().getHostName();
-    } catch (UnknownHostException e) {
-      localHostAddr = "localhost";
-    }
-  }
-  
-  /**
-   * @see org.apache.hadoop.chukwa.Chunk#getSerializedSizeEstimate()
-   */
-  public int getSerializedSizeEstimate() {
-    int size= 2 * (source.length() + application.length() + 
-        dataType.length() + debuggingInfo.length()); //length of strings (pessimistic)
-    size += data.length + 4;
-    if(recordEndOffsets == null)
-      size+=8;
-    else
-      size += 4 * (recordEndOffsets.length + 1); //+1 for length of array
-    size += 8; //uuid
-    return size;
-  }
-
-  public void setRecordOffsets(java.util.Collection<Integer> carriageReturns)
-  {
-    recordEndOffsets = new int [carriageReturns.size()];
-    int i = 0;
-    for(Integer offset:carriageReturns )
-      recordEndOffsets[i++] = offset;
-  }
-	
-}

+ 0 - 64
src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/conf/ChukwaConfiguration.java

@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.chukwa.conf;
-
-import java.io.File;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.log4j.Logger;
-
-public class ChukwaConfiguration extends Configuration {
-	static Logger log = Logger.getLogger(ChukwaConfiguration.class);
-
-	public ChukwaConfiguration() {
-		this(true);
-	}
-
-	public ChukwaConfiguration(boolean loadDefaults) {
-		super();
-		if (loadDefaults) {
-
-			String chukwaHome = System.getenv("CHUKWA_HOME");
-			if (chukwaHome == null){
-				chukwaHome = ".";
-			}
-
-			if(!chukwaHome.endsWith("/"))
-			{  chukwaHome = chukwaHome + File.separator; }	
-			String chukwaConf = System.getenv("CHUKWA_CONF_DIR");
-			if (chukwaConf == null)
-			{  chukwaConf = chukwaHome + "conf" + File.separator; }
-
-			log.info("chukwaConf is " + chukwaConf);
-
-		  super.addResource(new Path(chukwaConf + "/chukwa-collector-conf.xml"));
-		  log.debug("added chukwa-collector-conf.xml to ChukwaConfiguration");
-
-		  super.addResource(new Path(chukwaConf + "/chukwa-agent-conf.xml"));
-		  log.debug("added chukwa-agent-conf.xml to ChukwaConfiguration");
-
-		  super.addResource(new Path(chukwaConf + "/hadoop-site.xml"));
-		  log.debug("added hadoop-site.xml to ChukwaConfiguration");
-
-			
-		}
-	}
-
-}

+ 0 - 277
src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/Aggregator.java

@@ -1,277 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.chukwa.database;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.sql.ResultSet;
-import java.sql.ResultSetMetaData;
-import java.sql.SQLException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.sql.DatabaseMetaData;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.chukwa.inputtools.mdl.DataConfig;
-import org.apache.hadoop.chukwa.util.DatabaseWriter;
-import org.apache.hadoop.chukwa.util.ExceptionUtil;
-import org.apache.hadoop.chukwa.util.PidFile;
-
-public class Aggregator {
-	private static DatabaseConfig dbc = null;
-
-	private static Log log = LogFactory.getLog(Aggregator.class);
-	private String table = null;
-	private String jdbc = null;
-	private int[] intervals;
-	private long current = 0;
-    private static DatabaseWriter db = null;
-    public Aggregator() {
-		dbc = new DatabaseConfig();
-		Calendar now = Calendar.getInstance();
-		current = now.getTimeInMillis();
-	}
-
-	public HashMap<String,String> findMacros(String query) throws SQLException {
-		boolean add=false;
-		HashMap<String,String> macroList = new HashMap<String,String>();
-		String macro="";
-	    for(int i=0;i<query.length();i++) {
-	    	if(query.charAt(i)==']') {
-	    		add=false;
-	    		if(!macroList.containsKey(macro)) {
-		    		String subString = computeMacro(macro);
-		    		macroList.put(macro,subString);	    			
-	    		}
-	    		macro="";
-	    	}
-	    	if(add) {
-	    		macro=macro+query.charAt(i);
-	    	}
-	    	if(query.charAt(i)=='[') {
-	    		add=true;
-	    	}
-	    }
-	    return macroList;
-	}
-
-	public String computeMacro(String macro) throws SQLException {
-		Pattern p = Pattern.compile("past_(.*)_minutes");
-		Matcher matcher = p.matcher(macro);
-		if(macro.indexOf("avg(")==0 || macro.indexOf("group_avg(")==0) {
-			String meta="";
-			String[] table = dbc.findTableName(macro.substring(macro.indexOf("(")+1,macro.indexOf(")")), current, current);
-			try {
-				String cluster = System.getProperty("CLUSTER");
-				if(cluster==null) {
-					cluster="unknown";
-				}
-                DatabaseMetaData dbMetaData = db.getConnection().getMetaData();
-	            ResultSet rs = dbMetaData.getColumns ( null,null,table[0], null);
-	            boolean first=true;
-	            while(rs.next()) {
-	            	if(!first) {
-	            		meta = meta+",";
-	            	}
-	            	String name = rs.getString(4);
-	            	int type = rs.getInt(5);
-	            	if(type==java.sql.Types.VARCHAR) {
-	            		if(macro.indexOf("group_avg(")<0) {
-	            			meta=meta+"count("+name+") as "+name;
-	            		} else {
-	            			meta=meta+name;
-	            		}
-		            	first=false;
-	            	} else if(type==java.sql.Types.DOUBLE ||
-	            			  type==java.sql.Types.FLOAT ||
-	            			  type==java.sql.Types.INTEGER) {
-	            		meta=meta+"avg("+name+")";
-		            	first=false;
-	            	} else if(type==java.sql.Types.TIMESTAMP) {
-	            		// Skip the column
-	            	} else {
-	            		meta=meta+"AVG("+name+")";
-		            	first=false;
-	            	}
-	            }
-	            if(first) {
-	          	    throw new SQLException("Table is undefined.");
-	            }
-			} catch(SQLException ex) {
-				throw new SQLException("Table does not exist:"+ table[0]);
-			}
-			return meta;
-		} else if(macro.indexOf("now")==0) {
-			SimpleDateFormat sdf = new SimpleDateFormat();
-			return DatabaseWriter.formatTimeStamp(current);
-		} else if(matcher.find()) {
-			int period = Integer.parseInt(matcher.group(1));
-			long timestamp = current - (current % (period*60*1000L)) - (period*60*1000L);
-			return DatabaseWriter.formatTimeStamp(timestamp);
-		} else if(macro.indexOf("past_hour")==0) {
-			return DatabaseWriter.formatTimeStamp(current-3600*1000L);
-		} else if(macro.endsWith("_week")) {
-			long partition = current / DatabaseConfig.WEEK;
-			if(partition<=0) {
-				partition=1;
-			}
-			String[] buffers = macro.split("_");
-			StringBuffer tableName = new StringBuffer();
-			for(int i=0;i<buffers.length-1;i++) {
-				tableName.append(buffers[i]);
-				tableName.append("_");
-			}
-			tableName.append(partition);
-			tableName.append("_week");
-			return tableName.toString();
-		} else if(macro.endsWith("_month")) {
-			long partition = current / DatabaseConfig.MONTH;
-			if(partition<=0) {
-				partition=1;
-			}
-			String[] buffers = macro.split("_");
-			StringBuffer tableName = new StringBuffer();
-			for(int i=0;i<buffers.length-1;i++) {
-				tableName.append(buffers[i]);
-				tableName.append("_");
-			}
-			tableName.append(partition);
-			tableName.append("_month");
-			return tableName.toString();
-		} else if(macro.endsWith("_quarter")) {
-			long partition = current / DatabaseConfig.QUARTER;
-			if(partition<=0) {
-				partition=1;
-			}
-			String[] buffers = macro.split("_");
-			StringBuffer tableName = new StringBuffer();
-			for(int i=0;i<buffers.length-1;i++) {
-				tableName.append(buffers[i]);
-				tableName.append("_");
-			}
-			tableName.append(partition);
-			tableName.append("_quarter");
-			return tableName.toString();
-		} else if(macro.endsWith("_year")) {
-			long partition = current / DatabaseConfig.YEAR;
-			if(partition<=0) {
-				partition=1;
-			}
-			String[] buffers = macro.split("_");
-			StringBuffer tableName = new StringBuffer();
-			for(int i=0;i<buffers.length-1;i++) {
-				tableName.append(buffers[i]);
-				tableName.append("_");
-			}
-			tableName.append(partition);
-			tableName.append("_year");
-			return tableName.toString();
-		} else if(macro.endsWith("_decade")) {
-			long partition = current / DatabaseConfig.DECADE;
-			if(partition<=0) {
-				partition=1;
-			}
-			String[] buffers = macro.split("_");
-			StringBuffer tableName = new StringBuffer();
-			for(int i=0;i<buffers.length-1;i++) {
-				tableName.append(buffers[i]);
-				tableName.append("_");
-			}
-			tableName.append(partition);
-			tableName.append("_decade");
-			return tableName.toString();
-		}
-		String[] tableList = dbc.findTableName(macro,current,current);
-		return tableList[0];
-	}
-
-	public static String getContents(File aFile) {
-        StringBuffer contents = new StringBuffer();    
-        try {
-        	BufferedReader input =  new BufferedReader(new FileReader(aFile));
-        	try {
-        		String line = null; //not declared within while loop
-        		while (( line = input.readLine()) != null){
-        			contents.append(line);
-        			contents.append(System.getProperty("line.separator"));
-        		}
-        	} finally {
-        		input.close();
-        	}
-        } catch (IOException ex){
-        	ex.printStackTrace();
-        }    
-        return contents.toString();
-    }
-
-	public void process(String query) {
-		ResultSet rs = null;
-		String[] columns;
-		int[] columnsType;
-        String groupBy = "";
-	    long start = current;
-	    long end = current;
-        
-
-		try {
-            HashMap<String, String> macroList = findMacros(query);
-            Iterator<String> macroKeys = macroList.keySet().iterator();
-            while(macroKeys.hasNext()) {
-        	    String mkey = macroKeys.next();
-        	    log.debug("replacing:"+mkey+" with "+macroList.get(mkey));
-	    	    query = query.replace("["+mkey+"]", macroList.get(mkey));
-            }
-            db.execute(query);
-		} catch(SQLException e) {
-		    log.error(query);
-			log.error(e.getMessage());
-		}
-	}
-
-    public static void main(String[] args) {
-        log.info("Aggregator started.");
-    	dbc = new DatabaseConfig();
-		String cluster = System.getProperty("CLUSTER");
-		if(cluster==null) {
-			cluster="unknown";
-		}
-    	db = new DatabaseWriter(cluster);
-    	String queries = Aggregator.getContents(new File(System.getenv("CHUKWA_CONF_DIR")+File.separator+"aggregator.sql"));
-    	String[] query = queries.split("\n");
-    	for(int i=0;i<query.length;i++) {
-    		    if(query[i].equals("")) {
-    		    } else if(query[i].indexOf("#")==0) {
-    		    	log.debug("skipping: "+query[i]);
-    		    } else {
-    		    	Aggregator dba = new Aggregator();
-    		    	dba.process(query[i]);
-    		    }
-        }
-        db.close();
-    	log.info("Aggregator finished.");
-    }
-
-}

+ 0 - 256
src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/Consolidator.java

@@ -1,256 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.chukwa.database;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.chukwa.inputtools.mdl.DataConfig;
-import org.apache.hadoop.chukwa.util.DatabaseWriter;
-import org.apache.hadoop.chukwa.util.ExceptionUtil;
-import org.apache.hadoop.chukwa.util.PidFile;
-
-import java.sql.SQLException;
-import java.sql.ResultSet;
-import java.util.Calendar;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.sql.ResultSetMetaData;
-import java.text.SimpleDateFormat;
-
-public class Consolidator extends Thread {
-	private DatabaseConfig dbc = new DatabaseConfig();
-
-	private static Log log = LogFactory.getLog(Consolidator.class);
-	private String table = null;
-	private int[] intervals;
-    private static PidFile loader=null;
-
-	public Consolidator(String table, String intervalString) {
-		super(table);
-		try {
-			int i=0;
-			String[] temp = intervalString.split("\\,");
-			intervals = new int[temp.length];
-			for(String s: temp) {
-			    intervals[i]=Integer.parseInt(s);
-			    i++;
-			}
-			this.table = table;
-		} catch (NumberFormatException ex) {
-			log.error("Unable to parse summary interval");
-		}		
-	}
-	public void run() {
-		ResultSet rs = null;
-		String[] columns;
-		int[] columnsType;
-		String groupBy = "";
-        
-		for(int interval : intervals) {
-			// Start reducing from beginning of time;
-			Calendar aYearAgo = Calendar.getInstance();
-			aYearAgo.set(2008, 1, 1, 0, 0, 0);
-
-			long start = aYearAgo.getTimeInMillis();  //starting from 2008/01/01
-			long end = start + (interval*60000);
-			log.debug("start time: "+start);
-			log.debug("end time: "+end);
-			Calendar now = Calendar.getInstance();
-			String cluster = System.getProperty("CLUSTER");
-			if(cluster==null) {
-				cluster="unknown";
-			}
-			DatabaseWriter db = new DatabaseWriter(cluster);
-			String fields = null;
-			String dateclause = null;
-			boolean emptyPrimeKey = false;
-			log.info("Consolidate for "+interval+" minutes interval.");
-			
-			String[] tmpTable = dbc.findTableName(this.table, start, end);
-			String table = tmpTable[0];
-			String sumTable="";
-			if(interval==5) {
-				long partition=now.getTime().getTime() / DatabaseConfig.WEEK;
-				StringBuilder stringBuilder = new StringBuilder();
-				stringBuilder.append(this.table);
-				stringBuilder.append("_");
-				stringBuilder.append(partition);
-				stringBuilder.append("_week");
-				table=stringBuilder.toString();
-				long partition2=now.getTime().getTime() / DatabaseConfig.MONTH;
-				sumTable =this.table+"_"+partition2+"_month";
-			} else if(interval==30) {
-				long partition=now.getTime().getTime() / DatabaseConfig.MONTH;
-				table=this.table+"_"+partition+"_month";				
-				long partition2=now.getTime().getTime() / DatabaseConfig.QUARTER;
-				sumTable =this.table+"_"+partition2+"_month";
-			} else if(interval==180) {
-				long partition=now.getTime().getTime() / DatabaseConfig.QUARTER;
-				table=this.table+"_"+partition+"_quarter";
-				long partition2=now.getTime().getTime() / DatabaseConfig.YEAR;
-				sumTable =this.table+"_"+partition2+"_month";
-			} else if(interval==720) {
-				long partition=now.getTime().getTime() / DatabaseConfig.YEAR;
-				table=this.table+"_"+partition+"_year";
-				long partition2=now.getTime().getTime() / DatabaseConfig.DECADE;
-				sumTable =this.table+"_"+partition2+"_month";
-			}
-			// Find the most recent entry
-			try {
-			    String query = "select * from "+sumTable+" order by timestamp desc limit 1";
-	            log.debug("Query: "+query);
-	            rs = db.query(query);
-	            if(rs==null) {
-	          	    throw new SQLException("Table is undefined.");
-	            }
-	            ResultSetMetaData rmeta = rs.getMetaData();
-	            boolean empty=true;
-	            if(rs.next()) {
-	                for(int i=1;i<=rmeta.getColumnCount();i++) {
-		                if(rmeta.getColumnName(i).toLowerCase().equals("timestamp")) {
-		            	    start = rs.getTimestamp(i).getTime();
-		                }
-	                }
-	                empty=false;
-	            }
-	            if(empty) {
-	              	throw new SQLException("Table is empty.");
-	            }
-                end = start + (interval*60000);
-			} catch (SQLException ex) {
-			    try {
-				    String query = "select * from "+table+" order by timestamp limit 1";
-		            log.debug("Query: "+query);
-	                rs = db.query(query);
-	                if(rs.next()) {
-	    	            ResultSetMetaData rmeta = rs.getMetaData();
-	    	            for(int i=1;i<=rmeta.getColumnCount();i++) {
-	    	                if(rmeta.getColumnName(i).toLowerCase().equals("timestamp")) {
-	    	                	start = rs.getTimestamp(i).getTime();
-	    	                }
-	    	            }
-				    }
-                    end = start + (interval*60000);
-				} catch(SQLException ex2) {
-				    log.error("Unable to determine starting point in table: "+this.table);
-					log.error("SQL Error:"+ExceptionUtil.getStackTrace(ex2));
-					return;
-				}
-			}
-			try {
-                ResultSetMetaData rmeta = rs.getMetaData();
-                int col = rmeta.getColumnCount();
-                columns = new String[col];
-                columnsType = new int[col];
-                for(int i=1;i<=col;i++) {
-            	    columns[i-1]=rmeta.getColumnName(i);
-              	    columnsType[i-1]=rmeta.getColumnType(i);
-                }
-
-		        for(int i=0;i<columns.length;i++) {
-		    	    if(i==0) {
-		    		    fields=columns[i];
-	    	            if(columnsType[i]==java.sql.Types.VARCHAR) {
-	    	            	if(groupBy.equals("")) {
-	    	            	    groupBy = " group by "+columns[i];
-	    	            	} else {
-		    	            	groupBy = groupBy+","+columns[i];	    	            		
-	    	            	}
-	    	            }
-		    	    } else {
-		    		    if(columnsType[i]==java.sql.Types.VARCHAR || columnsType[i]==java.sql.Types.TIMESTAMP) {
-		    	            fields=fields+","+columns[i];
-		    	            if(columnsType[i]==java.sql.Types.VARCHAR) {
-		    	            	if(groupBy.equals("")) {
-		    	            	    groupBy = " group by "+columns[i];
-		    	            	} else {
-		    	            	    groupBy = groupBy+","+columns[i];		    	            		
-		    	            	}
-		    	            }
-		    		    } else {
-		    	            fields=fields+",AVG("+columns[i]+") as "+columns[i];
-		    		    }
-		    	    }
-		        }
-			} catch(SQLException ex) {
-			  	log.error("SQL Error:"+ExceptionUtil.getStackTrace(ex));
-			  	return;
-			}
-            if(groupBy.equals("")) {
-            	emptyPrimeKey = true;
-            }
-			long previousStart = start;
-			long partition = 0;
-			String timeWindowType="week";
-        	while(end < now.getTimeInMillis()-(interval*2*60000)) {
-			    // Select new data sample for the given intervals
-			    if(interval == 5) {
-			    	timeWindowType="month";
-					partition = start / DatabaseConfig.MONTH;
-			    } else if(interval == 30) {
-			    	timeWindowType="quarter";
-					partition = start / DatabaseConfig.QUARTER;
-			    } else if(interval == 180) {
-			    	timeWindowType="year";
-					partition = start / DatabaseConfig.YEAR;
-			    } else if(interval == 720) {
-			    	timeWindowType="decade";
-					partition = start / DatabaseConfig.DECADE;
-			    }
-	            SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-			    String startS = formatter.format(start);
-			    String endS = formatter.format(end);
-			    dateclause = "Timestamp >= '"+startS+"' and Timestamp <= '"+endS+"'";
-			    if(emptyPrimeKey) {
-			    	groupBy = " group by FLOOR(UNIX_TIMESTAMP(TimeStamp)/"+interval*60+")";
-			    }
-				String query = "replace into "+this.table+"_"+partition+"_"+timeWindowType+" (select "+fields+" from "+table+" where "+dateclause+groupBy+")";
-				log.debug(query);
-                db.execute(query);
-        		if(previousStart == start) {
-        			start = start + (interval*60000);
-        			end = start + (interval*60000);
-            		previousStart = start;
-        		}
-        	}
-            db.close();
-		}
-	}
-
-    public static void main(String[] args) {
-        DataConfig mdl = new DataConfig();
-        loader=new PidFile(System.getProperty("CLUSTER")+"Consolidator");
-        HashMap<String, String> tableNames = (HashMap<String, String>) mdl.startWith("consolidator.table.");
-        try {
-                Iterator<String> ti = (tableNames.keySet()).iterator();
-                while(ti.hasNext()) {
-                        String table = ti.next();
-                String interval=mdl.get(table);
-                table = table.substring(19);
-                        log.info("Summarizing table:"+table);
-                Consolidator dbc = new Consolidator(table, interval);
-                dbc.run();
-                }
-        } catch (NullPointerException e) {
-                log.error("Unable to summarize database.");
-                log.error("Error:"+ExceptionUtil.getStackTrace(e));
-        }
-        loader.clean();
-    }
-}

+ 0 - 106
src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/DataExpiration.java

@@ -1,106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.chukwa.database;
-
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Iterator;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.chukwa.util.DatabaseWriter;
-
-public class DataExpiration {
-	private static DatabaseConfig dbc = null;
-	private static Log log = LogFactory.getLog(DataExpiration.class);		
-	public DataExpiration() {
-    	if(dbc==null) {
-    	    dbc = new DatabaseConfig();
-    	}
-    }
-	public void dropTables(long start, long end) {
-		String cluster = System.getProperty("CLUSTER");
-		if(cluster==null) {
-			cluster="unknown";
-		}
-		DatabaseWriter dbw = new DatabaseWriter(cluster);
-		try {
-			HashMap<String, String> dbNames = dbc.startWith("report.db.name.");
-			Iterator<String> ki = dbNames.keySet().iterator();
-			while(ki.hasNext()) {
-				String name = ki.next();
-				String tableName = dbNames.get(name);
-				String[] tableList = dbc.findTableName(tableName, start, end);
-				for(String tl : tableList) {
-					log.debug("table name: "+tableList[0]);
-					try {
-						String[] parts = tl.split("_");
-						int partition = Integer.parseInt(parts[parts.length-2]);
-						String table = "";
-						for(int i=0;i<parts.length-2;i++) {
-							if(i!=0) {
-								table=table+"_";
-							}
-							table=table+parts[i];
-						}
-						partition=partition-3;
-						String dropPartition="drop table if exists "+table+"_"+partition+"_"+parts[parts.length-1];
-						dbw.execute(dropPartition);
-						partition--;
-						dropPartition="drop table if exists "+table+"_"+partition+"_"+parts[parts.length-1];
-						dbw.execute(dropPartition);
-					} catch(NumberFormatException e) {
-						log.error("Error in parsing table partition number, skipping table:"+tableList[0]);
-					} catch(ArrayIndexOutOfBoundsException e) {
-						log.debug("Skipping table:"+tableList[0]+", because it has no partition configuration.");
-					}
-				}
-			}
-		} catch(Exception e) {
-			e.printStackTrace();
-		}		
-	}
-	
-	public static void usage() {
-		System.out.println("DataExpiration usage:");
-		System.out.println("java -jar chukwa-core.jar org.apache.hadoop.chukwa.DataExpiration <date> <time window size>");
-		System.out.println("     date format: YYYY-MM-DD");
-		System.out.println("     time window size: 7, 30, 91, 365");		
-	}
-	
-	public static void main(String[] args) {
-		DataExpiration de = new DataExpiration();
-		long now = (new Date()).getTime();
-		long start = now;
-		long end = now;
-		if(args.length==2) {
-			SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
-			try {
-				start = sdf.parse(args[0]).getTime();				
-				end = start + (Long.parseLong(args[1])*1440*60*1000L);
-				de.dropTables(start, end);				
-			} catch(Exception e) {
-				usage();
-			}
-		} else {
-			usage();
-		}
-    }
-}

+ 0 - 244
src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/DatabaseConfig.java

@@ -1,244 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.chukwa.database;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import java.util.*;
-
-public class DatabaseConfig {
-    private Configuration config = null;
-	public final static long CENTURY=36500*24*60*60*1000L;
-    public final static long DECADE=3650*24*60*60*1000L;
-    public final static long YEAR=365*24*60*60*1000L;
-    public final static long QUARTER=91250*24*60*60L;
-    public final static long MONTH=30*24*60*60*1000L;
-	public final static long WEEK=7*24*60*60*1000L;
-	public final static long DAY=24*60*60*1000L;
-
-    public DatabaseConfig(String path) {
-        Path fileResource = new Path(path);
-        config = new Configuration();
-        config.addResource(fileResource);
-    }
-    public DatabaseConfig() {
-        Path fileResource = new Path(System.getenv("DATACONFIG"));
-        config = new Configuration();
-        config.addResource(fileResource);
-    }
-
-    public String get(String key) {
-        return config.get(key);
-    }
-    public void put(String key, String value) {
-        this.config.set(key, value);
-    }
-    public Iterator<?> iterator() {
-        return this.config.iterator();
-    }
-    public HashMap<String, String> startWith(String key) {
-        HashMap<String, String> transformer = new HashMap<String, String>();
-        Iterator<?> entries = config.iterator();
-        while(entries.hasNext()) {
-           String entry = entries.next().toString();
-           if(entry.startsWith(key)) {
-               String[] metrics = entry.split("=");
-               transformer.put(metrics[0],metrics[1]);
-           }
-        }
-        return transformer;
-    }    
-    public String[] findTableName(String tableName, long start, long end) {
-    	String[] tableNames = null;
-    	String tableType = "_week";
-		long now = (new Date()).getTime();
-		long timeWindow = end - start;
-		long partitionSize=WEEK;
-		boolean fallback=true;
-		
-		if(config.get("consolidator.table."+tableName)==null) {
-			tableNames = new String[1];
-			tableNames[0]=tableName;
-			return tableNames;
-		}
-		
-		if(timeWindow<=0) {
-			timeWindow=1;			
-		}
-		if(timeWindow > DECADE) {
-			tableType = "_century";
-			partitionSize=CENTURY;
-		} else if(timeWindow > YEAR) {
-			tableType = "_decade";
-			partitionSize=DECADE;
-		} else if(timeWindow > QUARTER) {
-			tableType = "_year";
-			partitionSize=YEAR;			
-		} else if(timeWindow > MONTH) {
-			tableType = "_quarter";
-			partitionSize=QUARTER;
-		} else if(timeWindow > WEEK) {
-			tableType = "_month";
-			partitionSize=MONTH;
-		} else {
-			tableType = "_week";
-			partitionSize=WEEK;
-		}
-
-		long currentPartition = now / partitionSize;
-		long startPartition = start / partitionSize;
-		long endPartition = end / partitionSize;
-		while(fallback && partitionSize!=CENTURY*100) {
-			// Check if the starting date is in the far distance from current time.  If it is, use down sampled data.
-			if(startPartition + 2 < currentPartition) {
-				fallback=true;
-			    if(partitionSize==DAY) {
-				    tableType = "_week";
-				    partitionSize=WEEK;
-			    } else if(partitionSize==WEEK) {
-				    tableType = "_month";
-				    partitionSize=MONTH;
-			    } else if(partitionSize==MONTH) {
-				    tableType = "_year";
-				    partitionSize=YEAR;
-			    } else if(partitionSize==YEAR) {
-					tableType = "_decade";
-					partitionSize=DECADE;				
-				} else if(partitionSize==DECADE) {
-					tableType = "_century";
-					partitionSize=CENTURY;
-				} else {
-					partitionSize=100*CENTURY;
-				}
-				currentPartition = now / partitionSize;
-				startPartition = start / partitionSize;
-				endPartition = end / partitionSize;
-			} else {
-				fallback=false;
-			}
-		}
-
-		if(startPartition!=endPartition) {
-			int delta = (int) (endPartition-startPartition);
-			tableNames=new String[delta+1];
-			for(int i=0;i<=delta;i++) {
-				long partition = startPartition+(long)i;
-				tableNames[i]=tableName+"_"+partition+tableType;
-			}
-		} else {
-			tableNames=new String[1];
-			tableNames[0]=tableName+"_"+startPartition+tableType;
-		}
-    	return tableNames;
-    }
-    public String[] findTableNameForCharts(String tableName, long start, long end) {
-    	String[] tableNames = null;
-    	String tableType = "_week";
-		long now = (new Date()).getTime();
-		long timeWindow = end - start;
-		if(timeWindow>60*60*1000) {
-		    timeWindow = timeWindow + 1;
-		}
-		long partitionSize=WEEK;
-		boolean fallback=true;
-		
-		if(config.get("consolidator.table."+tableName)==null) {
-			tableNames = new String[1];
-			tableNames[0]=tableName;
-			return tableNames;
-		}
-		
-		if(timeWindow<=0) {
-			timeWindow=1;			
-		}
-		if(timeWindow > YEAR) {
-			tableType = "_century";
-			partitionSize=CENTURY;			
-		} else if(timeWindow > QUARTER) {
-			tableType = "_century";
-			partitionSize=CENTURY;
-		} else if(timeWindow > MONTH) {
-			tableType = "_decade";
-			partitionSize=DECADE;
-		} else if(timeWindow > WEEK) {
-			tableType = "_year";
-			partitionSize=YEAR;
-		} else if(timeWindow > DAY) {
-			tableType = "_quarter";
-			partitionSize=QUARTER;
-		} else if(timeWindow > 60*60*1000) {
-			tableType = "_month";
-			partitionSize=MONTH;			
-		} else {
-			tableType = "_week";
-			partitionSize = WEEK;
-		}
-
-		long currentPartition = now / partitionSize;
-		long startPartition = start / partitionSize;
-		long endPartition = end / partitionSize;
-		while(fallback && partitionSize!=DECADE*100) {
-			// Check if the starting date is in the far distance from current time.  If it is, use down sampled data.
-			if(startPartition + 2 < currentPartition) {
-				fallback=true;
-			    if(partitionSize==DAY) {
-				    tableType = "_month";
-				    partitionSize=MONTH;
-			    } else if(partitionSize==WEEK) {
-				    tableType = "_quarter";
-				    partitionSize=QUARTER;
-			    } else if(partitionSize==MONTH) {
-				    tableType = "_year";
-				    partitionSize=YEAR;
-			    } else if(partitionSize==YEAR) {
-					tableType = "_decade";
-					partitionSize=DECADE;				
-				} else {
-					partitionSize=CENTURY;
-				}
-				currentPartition = now / partitionSize;
-				startPartition = start / partitionSize;
-				endPartition = end / partitionSize;
-			} else {
-				fallback=false;
-			}
-		}
-
-		if(startPartition!=endPartition) {
-			int delta = (int) (endPartition-startPartition);
-			tableNames=new String[delta+1];
-			for(int i=0;i<=delta;i++) {
-				long partition = startPartition+(long)i;
-				tableNames[i]=tableName+"_"+partition+tableType;
-			}
-		} else {
-			tableNames=new String[1];
-			tableNames[0]=tableName+"_"+startPartition+tableType;
-		}
-    	return tableNames;
-    }
-    
-    public static void main(String[] args) {
-    	DatabaseConfig dbc = new DatabaseConfig();
-    	String[] names = dbc.findTableName("system_metrics",1216140020000L,1218645620000L);
-    	for(String n: names) {
-    		System.out.println("name:"+n);
-    	}
-    }
-}

+ 0 - 159
src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/MetricsAggregation.java

@@ -1,159 +0,0 @@
-package org.apache.hadoop.chukwa.database;
-
-import java.sql.Connection;
-import java.sql.DatabaseMetaData;
-import java.sql.DriverManager;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-public class MetricsAggregation
-{
-	 private static Log log = LogFactory.getLog(MetricsAggregation.class);
-	 private static Connection conn = null;    
-     private static Statement stmt = null; 
-     private static ResultSet rs = null; 
-     private static DatabaseConfig mdlConfig;
-     
-	/**
-	 * @param args
-	 * @throws SQLException 
-	 */
-	public static void main(String[] args) throws SQLException
-	{
-	       mdlConfig = new DatabaseConfig();
-		
-	       // Connect to the database
-	       String jdbc_url = System.getenv("JDBC_URL_PREFIX")+mdlConfig.get("jdbc.host")+"/"+mdlConfig.get("jdbc.db");
-	       if(mdlConfig.get("jdbc.user")!=null) {
-	           jdbc_url = jdbc_url + "?user=" + mdlConfig.get("jdbc.user");
-	           if(mdlConfig.get("jdbc.password")!=null) {
-	               jdbc_url = jdbc_url + "&password=" + mdlConfig.get("jdbc.password");
-	           }
-	       }
-	       try {
-	           // The newInstance() call is a work around for some
-	           // broken Java implementations
-                   String jdbcDriver = System.getenv("JDBC_DRIVER");
-	           Class.forName(jdbcDriver).newInstance();
-	           log.info("Initialized JDBC URL: "+jdbc_url);
-	       } catch (Exception ex) {
-	           // handle the error
-	    	   ex.printStackTrace();
-	           log.error(ex,ex);
-	       }
-	       try {
-	           conn = DriverManager.getConnection(jdbc_url);
-	       } catch (SQLException ex) 
-	       {
-	    	   ex.printStackTrace();
-	           log.error(ex,ex);
-	       }      
-	       
-	       // get the latest timestamp for aggregation on this table
-		   // Start = latest
-	       
-	      
-	       
-	       SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-	        
-	       long start = System.currentTimeMillis() - (1000*60*60*24);
-	       long end = System.currentTimeMillis() - (1000*60*10);
-	       // retrieve metadata for cluster_system_metrics
-	       DatabaseConfig dbConf = new DatabaseConfig();
-	       String[] tables = dbConf.findTableName("cluster_system_metrics_2018_week", start, end);
-	       for(String table: tables)
-	       {
-	    	   System.out.println("Table to aggregate per Ts: " + table);
-	    	   stmt = conn.createStatement();
-	    	   rs = stmt.executeQuery("select table_ts from aggregation_admin_table where table_name=\"" 
-	    			   + table + "\"");
-			   if (rs.next())
-			   {
-				   start = rs.getLong(1);
-			   }
-			   else
-			   {
-				   start = 0;
-			   }
-			   
-			   end = start + (1000*60*60*1); // do 1 hour aggregation max 
-			   long now = System.currentTimeMillis();
-			   now = now - (1000*60*10); // wait for 10 minutes
-			   end = Math.min(now, end);
-		     
-			   // TODO REMOVE DEBUG ONLY!
-			   end = now;
-			   
-			   System.out.println("Start Date:" + new Date(start));
-			   System.out.println("End Date:" + new Date(end));
-			   
-		       DatabaseMetaData dbm = conn.getMetaData ();
-		       rs = dbm.getColumns ( null,null,table, null);
-		      	
-		       List<String> cols = new ArrayList<String>();
-		       while (rs.next ())
-		       {
-		          	String s = rs.getString (4); // 4 is column name, 5 data type etc. 
-		          	System.out.println ("Name: " + s);
-		          	int type = rs.getInt(5);
-		          	if (type == java.sql.Types.VARCHAR)
-		          	{
-		          		System.out.println("Type: Varchar " + type);
-		          	}
-		          	else
-		          	{
-		          		cols.add(s);
-		          		System.out.println("Type: Number " + type);
-		          	}
-		       }// end of while.
-		       
-		       // build insert into from select query
-		       String initTable = table.replace("cluster_", "");
-		       StringBuilder sb0 = new StringBuilder();
-		       StringBuilder sb = new StringBuilder();
-		       sb0.append("insert into ").append(table).append(" (");
-		       sb.append(" ( select ");
-		       for (int i=0;i<cols.size();i++)
-		       {
-		    	   sb0.append(cols.get(i));
-		    	   sb.append("avg(").append(cols.get(i)).append(") ");
-		    	   if (i< cols.size()-1)
-		    	   {
-		    		   sb0.append(",");
-		    		   sb.append(",");
-		    	   }
-		       }
-			   sb.append(" from ").append(initTable);
-			   sb.append(" where timestamp between \"");
-			   sb.append(formatter.format(start));
-			   sb.append("\" and \"").append(formatter.format(end));
-			   sb.append("\" group by timestamp  )");
-			  
-		        
-			   // close fields
-			   sb0.append(" )").append(sb);
-			   System.out.println(sb0.toString());
-			   
-			   // run query
-			   conn.setAutoCommit(false);
-			   stmt = conn.createStatement();
-			   stmt.execute(sb0.toString());
-			   
-			   // update last run
-			   stmt = conn.createStatement();
-			   stmt.execute("insert into aggregation_admin_table set table_ts=\"" +  formatter.format(end) +
-					   "\" where table_name=\"" + table + "\"");
-			   conn.commit();
-	       }
-	
-	}
-
-}

Some files were not shown because too many files changed in this diff