diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8ea2d42 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.idea +db.sh +dr.sh +ds.sh diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4d03fcc --- /dev/null +++ b/Dockerfile @@ -0,0 +1,35 @@ +FROM sequenceiq/hadoop-docker + +MAINTAINER @mraad + +USER root + +ENV PATH $PATH:$HADOOP_PREFIX/bin + +RUN chown -R root:root $HADOOP_PREFIX + +RUN echo -e "\n* soft nofile 65536\n* hard nofile 65536" >> /etc/security/limits.conf + +RUN curl -s http://mirror.cc.columbia.edu/pub/software/apache/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz | tar -xz -C /usr/local +RUN ln -s /usr/local/zookeeper-3.4.6 /usr/local/zookeeper;\ + chown -R root:root /usr/local/zookeeper-3.4.6;\ + mkdir -p /var/zookeeper +ENV ZOOKEEPER_HOME /usr/local/zookeeper +ENV PATH $PATH:$ZOOKEEPER_HOME/bin +ADD zookeeper/* $ZOOKEEPER_HOME/conf/ + +RUN curl -s http://archive.apache.org/dist/accumulo/1.5.2/accumulo-1.5.2-bin.tar.gz | tar -xz -C /usr/local +RUN ln -s /usr/local/accumulo-1.5.2 /usr/local/accumulo;\ + chown -R root:root /usr/local/accumulo-1.5.2 +ENV ACCUMULO_HOME /usr/local/accumulo +ENV PATH $PATH:$ACCUMULO_HOME/bin +ADD accumulo/* $ACCUMULO_HOME/conf/ + +ADD *-all.sh /etc/ +RUN chown root:root /etc/*-all.sh;\ + chmod 700 /etc/*-all.sh + +ADD init-accumulo.sh /tmp/ +RUN /tmp/init-accumulo.sh + +EXPOSE 2181 9000 50095 diff --git a/README.md b/README.md index 631c44f..1b13605 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,101 @@ -# accumulo-docker -Run a single node Accumulo instance +# Single Node [Accumulo](https://accumulo.apache.org/) Instance On Docker + +This work is base on [https://github.com/medined/docker-accumulo](https://github.com/medined/docker-accumulo) - Thanks :-) + +If you are using [boot2docker](http://boot2docker.io/) you might want to up the memory and storage space. + +```shell +boot2docker init -m 8192 -s 32768 +``` + +On Windows, the `C:` drive is mounted on the linux host as `/c`. Copy this folder onto your `C:` drive so you can `cd /c/accumulo-docker` + +### vm.swappiness and docker + +The `vm.swappiness` system parameter has to be set in the docker host OS to be inherited by the Accumulo container. + +If you are using boot2docker then `boot2docker ssh` to login to the host OS. + +```shell +sudo sysctl -w vm.swappiness=0 +sudo sysctl -w net.ipv6.conf.lo.disable_ipv6=1 +sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 +sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1 +``` + +Check the value using: +```shell +sysctl vm.swappiness +``` + +### Build the container image + +```shell +docker build -t mraad/accumulo . +``` + +### Run the container + +```shell +docker run --name accumulo -i -t -P mraad/accumulo /bin/bash +``` + +### Start Zookeeper, YARN, HDFS and Accumulo + +```shell +/etc/start-all.sh +``` + +### Stop Accumulo, HDFS, YARN and Zookeeper + +```shell +/etc/stop-all.sh +``` + +### See all exposed ports + +```shell +docker port accumulo | sort -t / -n +``` + + +In this line sample `50070/tcp -> 0.0.0.0:49161`, the internal port `50070` is mapped to `49161` on the host OS. + +If you are using boot2docker, get the host OS IP using `boot2docker ip` + +SERVICE |URL | +---------|--------------------------------| +YARN | http://docker-ip:exposed-8088 | +HDFS | http://docker-ip:exposed-50070 | +ACCUMULO | http://docker-ip:exposed-50095 | + + +### Sample Accumulo session in the container + +```shell +bash-4.1# accumulo shell -u root -p secret + +Shell - Apache Accumulo Interactive Shell +- +- version: 1.5.2 +- instance name: accumulo +- instance id: 57fdffe2-5a38-48dd-934f-5d2db507027d +- +- type 'help' for a list of available commands +- +root@accumulo> createtable mytable +root@accumulo mytable> tables +!METADATA +mytable +trace +root@accumulo mytable> insert row1 colf colq value1 +root@accumulo mytable> scan +row1 colf:colq [] value1 +root@accumulo mytable> exit +``` + +### Extra References + +* http://stackoverflow.com/questions/25767224/change-swappiness-for-docker-container +* http://en.wikipedia.org/wiki/Swappiness +* http://www.incrediblemolk.com/sharing-a-windows-folder-with-the-boot2docker-vm/ diff --git a/accumulo/accumulo-env.sh b/accumulo/accumulo-env.sh new file mode 100644 index 0000000..690d795 --- /dev/null +++ b/accumulo/accumulo-env.sh @@ -0,0 +1,57 @@ +#! /usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +### +### Configure these environment variables to point to your local installations. +### +### The functional tests require conditional values, so keep this style: +### +### test -z "$JAVA_HOME" && export JAVA_HOME=/usr/local/lib/jdk-1.6.0 +### +### +### Note that the -Xmx -Xms settings below require substantial free memory: +### you may want to use smaller values, especially when running everything +### on a single machine. +### + +if [ -z "$HADOOP_HOME" ] +then + test -z "$HADOOP_PREFIX" && export HADOOP_PREFIX=/usr/local/hadoop +else + HADOOP_PREFIX="$HADOOP_HOME" + unset HADOOP_HOME +fi +test -z "$HADOOP_CONF_DIR" && export HADOOP_CONF_DIR="$HADOOP_PREFIX/etc/hadoop" +test -z "$JAVA_HOME" && export JAVA_HOME=/usr/java/default +test -z "$ZOOKEEPER_HOME" && export ZOOKEEPER_HOME=/usr/local/zookeeper +test -z "$ACCUMULO_HOME" && export ACCUMULO_HOME=/usr/local/accumulo +test -z "$ACCUMULO_LOG_DIR" && export ACCUMULO_LOG_DIR=$ACCUMULO_HOME/logs +if [ -f ${ACCUMULO_CONF_DIR}/accumulo.policy ] +then + POLICY="-Djava.security.manager -Djava.security.policy=${ACCUMULO_CONF_DIR}/accumulo.policy" +fi +test -z "$ACCUMULO_TSERVER_OPTS" && export ACCUMULO_TSERVER_OPTS="${POLICY} -Xmx1g -Xms384m" +test -z "$ACCUMULO_MASTER_OPTS" && export ACCUMULO_MASTER_OPTS="${POLICY} -Xmx1g -Xms128m" +test -z "$ACCUMULO_MONITOR_OPTS" && export ACCUMULO_MONITOR_OPTS="${POLICY} -Xmx1g -Xms64m" +test -z "$ACCUMULO_GC_OPTS" && export ACCUMULO_GC_OPTS="-Xmx1g -Xms64m" +test -z "$ACCUMULO_GENERAL_OPTS" && export ACCUMULO_GENERAL_OPTS="-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75 -Djava.net.preferIPv4Stack=true" +test -z "$ACCUMULO_OTHER_OPTS" && export ACCUMULO_OTHER_OPTS="-Xmx1g -Xms64m" +# what do when the JVM runs out of heap memory +export ACCUMULO_KILL_CMD='kill -9 %p' + +# Should the monitor bind to all network interfaces -- default: false +# export ACCUMULO_MONITOR_BIND_ALL="true" diff --git a/accumulo/accumulo-site-template.xml b/accumulo/accumulo-site-template.xml new file mode 100644 index 0000000..1f5eab1 --- /dev/null +++ b/accumulo/accumulo-site-template.xml @@ -0,0 +1,159 @@ + + + + + + + + instance.zookeeper.host + HOSTNAME:2181 + comma separated list of zookeeper servers + + + + logger.dir.walog + /var/lib/accumulo/walogs + The directory used to store write-ahead logs on the + local filesystem. It is possible to specify a comma-separated list + of directories. + + + + + instance.secret + secret + + + + + + trace.token.property.password + + secret + + + + + tserver.cache.index.size + 128M + + + + tserver.memory.maps.max + 1G + + + + tserver.cache.data.size + 128M + + + + crypto.cipher.algorithm.name + AES + + + table.cache.block.enable + true + + + crypto.module.class + org.apache.accumulo.core.security.crypto.DefaultCryptoModule + + + crypto.cipher.suite + AES/CFB/NoPadding + + + trace.user + root + + + + table.cache.index.enable + true + + + crypto.secure.rng.provider + SUN + + + crypto.cipher.key.length + 128 + + + crypto.secure.rng + SHA1PRNG + + + crypto.default.key.strategy.cipher.suite + AES/ECB/NoPadding + + + + general.classpaths + + $HADOOP_CONF_DIR, + $ACCUMULO_HOME/server/target/classes/, + $ACCUMULO_HOME/lib/accumulo-server.jar, + $ACCUMULO_HOME/core/target/classes/, + $ACCUMULO_HOME/lib/accumulo-core.jar, + $ACCUMULO_HOME/start/target/classes/, + $ACCUMULO_HOME/lib/accumulo-start.jar, + $ACCUMULO_HOME/fate/target/classes/, + $ACCUMULO_HOME/lib/accumulo-fate.jar, + $ACCUMULO_HOME/proxy/target/classes/, + $ACCUMULO_HOME/lib/accumulo-proxy.jar, + $ACCUMULO_HOME/lib/[^.].*.jar, + $ZOOKEEPER_HOME/zookeeper[^.].*.jar, + + $HADOOP_PREFIX/share/hadoop/common/.*.jar, + $HADOOP_PREFIX/share/hadoop/common/lib/.*.jar, + $HADOOP_PREFIX/share/hadoop/hdfs/.*.jar, + $HADOOP_PREFIX/share/hadoop/mapreduce/.*.jar, + $HADOOP_PREFIX/share/hadoop/yarn/.*.jar, + $HADOOP_PREFIX/share/hadoop/hdfs/lib/.*.jar, + $HADOOP_PREFIX/share/hadoop/mapreduce/lib/.*.jar, + $HADOOP_PREFIX/share/hadoop/yarn/lib/.*.jar + + /usr/local/hadoop/[^.].*.jar, + /usr/local/hadoop/lib/[^.].*.jar, + /usr/local/hadoop-hdfs/[^.].*.jar, + /usr/local/hadoop-hdfs/lib/[^.].*.jar, + /usr/local/hadoop-yarn/[^.].*.jar, + /usr/local/hadoop-yarn/lib/[^.].*.jar, + /usr/local/hadoop-mapreduce/[^.].*.jar, + /usr/local/hadoop-mapreduce/lib/*.jar + + + diff --git a/accumulo/configuration.xsl b/accumulo/configuration.xsl new file mode 100644 index 0000000..377cdbe --- /dev/null +++ b/accumulo/configuration.xsl @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + +
namevaluedescription
+ + +
+
diff --git a/accumulo/gc b/accumulo/gc new file mode 100644 index 0000000..e216b19 --- /dev/null +++ b/accumulo/gc @@ -0,0 +1 @@ +HOSTNAME diff --git a/accumulo/generic_logger.xml b/accumulo/generic_logger.xml new file mode 100644 index 0000000..ead2b88 --- /dev/null +++ b/accumulo/generic_logger.xml @@ -0,0 +1,87 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/accumulo/log4j.properties b/accumulo/log4j.properties new file mode 100644 index 0000000..a4bcb2e --- /dev/null +++ b/accumulo/log4j.properties @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# default logging properties: +# by default, log everything at INFO or higher to the console +log4j.rootLogger=INFO,A1 + +# hide Jetty junk +log4j.logger.org.mortbay.log=WARN,A1 + +# hide "Got brand-new compresssor" messages +log4j.logger.org.apache.hadoop.io.compress=WARN,A1 + +# hide junk from TestRandomDeletes +log4j.logger.org.apache.accumulo.test.TestRandomDeletes=WARN,A1 + +# hide junk from VFS +log4j.logger.org.apache.commons.vfs2.impl.DefaultFileSystemManager=WARN,A1 + +# hide almost everything from zookeeper +log4j.logger.org.apache.zookeeper=ERROR,A1 + +# hide AUDIT messages in the shell, alternatively you could send them to a different logger +log4j.logger.org.apache.accumulo.core.util.shell.Shell.audit=WARN,A1 + +# Send most things to the console +log4j.appender.A1=org.apache.log4j.ConsoleAppender +log4j.appender.A1.layout.ConversionPattern=%d{ISO8601} [%-8c{2}] %-5p: %m%n +log4j.appender.A1.layout=org.apache.log4j.PatternLayout diff --git a/accumulo/masters b/accumulo/masters new file mode 100644 index 0000000..e216b19 --- /dev/null +++ b/accumulo/masters @@ -0,0 +1 @@ +HOSTNAME diff --git a/accumulo/monitor b/accumulo/monitor new file mode 100644 index 0000000..e216b19 --- /dev/null +++ b/accumulo/monitor @@ -0,0 +1 @@ +HOSTNAME diff --git a/accumulo/monitor_logger.xml b/accumulo/monitor_logger.xml new file mode 100644 index 0000000..3a63bf4 --- /dev/null +++ b/accumulo/monitor_logger.xml @@ -0,0 +1,64 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/accumulo/slaves b/accumulo/slaves new file mode 100644 index 0000000..e216b19 --- /dev/null +++ b/accumulo/slaves @@ -0,0 +1 @@ +HOSTNAME diff --git a/accumulo/tracers b/accumulo/tracers new file mode 100644 index 0000000..e216b19 --- /dev/null +++ b/accumulo/tracers @@ -0,0 +1 @@ +HOSTNAME diff --git a/init-accumulo.sh b/init-accumulo.sh new file mode 100755 index 0000000..83a79d4 --- /dev/null +++ b/init-accumulo.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +: ${HADOOP_PREFIX:=/usr/local/hadoop} + +$HADOOP_PREFIX/etc/hadoop/hadoop-env.sh + +rm /tmp/*.pid + +# installing libraries if any - (resource urls added comma separated to the ACP system variable) +cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd - + +sed "s/HOSTNAME/$HOSTNAME/g" /usr/local/hadoop/etc/hadoop/core-site.xml.template > /usr/local/hadoop/etc/hadoop/core-site.xml + +sed "s/HOSTNAME/$HOSTNAME/g" /usr/local/accumulo/conf/accumulo-site-template.xml > /usr/local/accumulo/conf/accumulo-site.xml + +echo $HOSTNAME > /usr/local/accumulo/conf/gc +echo $HOSTNAME > /usr/local/accumulo/conf/masters +echo $HOSTNAME > /usr/local/accumulo/conf/monitor +echo $HOSTNAME > /usr/local/accumulo/conf/slaves +echo $HOSTNAME > /usr/local/accumulo/conf/tracers + +service sshd start + +$ZOOKEEPER_HOME/bin/zkServer.sh start + +$HADOOP_PREFIX/sbin/start-dfs.sh +$HADOOP_PREFIX/bin/hdfs dfsadmin -safemode wait +$HADOOP_PREFIX/sbin/start-yarn.sh + +$ACCUMULO_HOME/bin/accumulo init --instance-name accumulo --password secret + +$HADOOP_PREFIX/etc/hadoop/hadoop-env.sh + +$HADOOP_PREFIX/sbin/stop-yarn.sh +$HADOOP_PREFIX/sbin/stop-dfs.sh + +$ZOOKEEPER_HOME/bin/zkServer.sh stop + +service sshd stop diff --git a/start-all.sh b/start-all.sh new file mode 100755 index 0000000..ef19c2c --- /dev/null +++ b/start-all.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +: ${HADOOP_PREFIX:=/usr/local/hadoop} + +$HADOOP_PREFIX/etc/hadoop/hadoop-env.sh + +rm /tmp/*.pid + +# installing libraries if any - (resource urls added comma separated to the ACP system variable) +cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd - + +sed "s/HOSTNAME/$HOSTNAME/g" /usr/local/hadoop/etc/hadoop/core-site.xml.template > /usr/local/hadoop/etc/hadoop/core-site.xml + +sed "s/HOSTNAME/$HOSTNAME/g" /usr/local/accumulo/conf/accumulo-site-template.xml > /usr/local/accumulo/conf/accumulo-site.xml + +echo $HOSTNAME > /usr/local/accumulo/conf/gc +echo $HOSTNAME > /usr/local/accumulo/conf/masters +echo $HOSTNAME > /usr/local/accumulo/conf/monitor +echo $HOSTNAME > /usr/local/accumulo/conf/slaves +echo $HOSTNAME > /usr/local/accumulo/conf/tracers + +service sshd start +$ZOOKEEPER_HOME/bin/zkServer.sh start +$HADOOP_PREFIX/sbin/start-dfs.sh +$HADOOP_PREFIX/bin/hdfs dfsadmin -safemode wait +$HADOOP_PREFIX/sbin/start-yarn.sh +$ACCUMULO_HOME/bin/start-all.sh diff --git a/stop-all.sh b/stop-all.sh new file mode 100755 index 0000000..b36ad34 --- /dev/null +++ b/stop-all.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +: ${HADOOP_PREFIX:=/usr/local/hadoop} + +$ACCUMULO_HOME/bin/stop-all.sh +$HADOOP_PREFIX/etc/hadoop/hadoop-env.sh +$HADOOP_PREFIX/sbin/stop-yarn.sh +$HADOOP_PREFIX/sbin/stop-dfs.sh +$ZOOKEEPER_HOME/bin/zkServer.sh stop + +service sshd stop diff --git a/zookeeper/log4j.properties b/zookeeper/log4j.properties new file mode 100644 index 0000000..a9dfcef --- /dev/null +++ b/zookeeper/log4j.properties @@ -0,0 +1,58 @@ +# Define some default values that can be overridden by system properties +zookeeper.root.logger=WARN, CONSOLE +zookeeper.console.threshold=WARN +zookeeper.log.dir=. +zookeeper.log.file=zookeeper.log +zookeeper.log.threshold=DEBUG +zookeeper.tracelog.dir=. +zookeeper.tracelog.file=zookeeper_trace.log + +# +# ZooKeeper Logging Configuration +# + +# Format is " (, )+ + +# DEFAULT: console appender only +log4j.rootLogger=${zookeeper.root.logger} + +# Example with rolling log file +#log4j.rootLogger=DEBUG, CONSOLE, ROLLINGFILE + +# Example with rolling log file and tracing +#log4j.rootLogger=TRACE, CONSOLE, ROLLINGFILE, TRACEFILE + +# +# Log INFO level and above messages to the console +# +log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender +log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold} +log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout +log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n + +# +# Add ROLLINGFILE to rootLogger to get log file output +# Log DEBUG level and above messages to a log file +log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender +log4j.appender.ROLLINGFILE.Threshold=${zookeeper.log.threshold} +log4j.appender.ROLLINGFILE.File=${zookeeper.log.dir}/${zookeeper.log.file} + +# Max log file size of 10MB +log4j.appender.ROLLINGFILE.MaxFileSize=10MB +# uncomment the next line to limit number of backup files +#log4j.appender.ROLLINGFILE.MaxBackupIndex=10 + +log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout +log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n + + +# +# Add TRACEFILE to rootLogger to get log file output +# Log DEBUG level and above messages to a log file +log4j.appender.TRACEFILE=org.apache.log4j.FileAppender +log4j.appender.TRACEFILE.Threshold=TRACE +log4j.appender.TRACEFILE.File=${zookeeper.tracelog.dir}/${zookeeper.tracelog.file} + +log4j.appender.TRACEFILE.layout=org.apache.log4j.PatternLayout +### Notice we are including log4j's NDC here (%x) +log4j.appender.TRACEFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L][%x] - %m%n diff --git a/zookeeper/zoo.cfg b/zookeeper/zoo.cfg new file mode 100644 index 0000000..0b80c4d --- /dev/null +++ b/zookeeper/zoo.cfg @@ -0,0 +1,6 @@ +tickTime=2000 +initLimit=10 +syncLimit=5 +dataDir=/var/zookeeper +clientPort=2181 +maxClientCnxns=100