Followings steps are for: Ubuntu 14.04 LTS, Hadoop-2.6.0
Prerequisites:
- Ubuntu 14.04
- Java JDK 1.7
- Maven 3.0.5
- Hadoop 2.6.0
Install Oozie
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
| # install Maven
sudo apt-get update
sudo apt-get install maven
# build Oozie
wget http://mirrors.whoishostingthis.com/apache/oozie/4.1.0/oozie-4.1.0.tar.gz
tar -xzvf oozie-4.1.0.tar.gz
cd oozie-4.1.0
bin/mkdistro.sh -DskipTests -Dhadoopversion=2.6.0
# copy the binary distribution
cp -R distro/target/oozie-4.1.0-distro/oozie-4.1.0/* ~/oozie-4.1
# edit /etc/profile
sudo vim /etc/profile
export OOZIE_VERSION=4.1.0
export OOZIE_HOME=/home/anggao/oozie-4.1
export PATH=$PATH:$OOZIE_HOME/bin
source /etc/profile
# enable web console for Oozie
# we need ext-*.*.zip library and extjs
cd $OOZIE_HOME
mkdir libext
cp ../oozie-4.1.0/hadooplibs/target/oozie-4.1.0-hadooplibs.tar.gz .
tar -xzvf oozie-4.1.0-hadooplibs.tar.gz
cp oozie-4.1.0/hadooplibs/hadooplib-2.3.0.oozie-4.1.0/* libext
cd libext/
wget http://archive.cloudera.com/gplextras/misc/ext-2.2.zip
|
1
2
3
4
5
6
7
8
9
10
11
12
| # Configure the Hadoop cluster with proxyuser for the Oozie process.
# The following two properties are required in Hadoop core-site.xml:
<!-- OOZIE -->
<property>
<name>hadoop.proxyuser.anggao.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.anggao.groups</name>
<value>*</value>
</property>
|
1
2
3
4
5
6
7
8
9
10
| # Prepare oozie war file
oozie-setup.sh prepare-war
# Create Sharelib Directory on HDFS
# first get HDFS info
hdfs getconf -confKey fs.defaultFS
# use the info obtained above
oozie-setup.sh sharelib create -fs hdfs://YARN001:8020
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
| # use the info obtained above
oozie-setup.sh sharelib create -fs hdfs://YARN001:8020
# update oozie-site.xml under OOZIE_CONF_DIR
<property>
<name>oozie.service.HadoopAccessorService.hadoop.configurations</name>
<value>*=/home/anggao/hadoop-2.6.0/etc/hadoop/</value>
<description>
Comma separated AUTHORITY=HADOOP_CONF_DIR, where AUTHORITY is the HOST:PORT of
the Hadoop service (JobTracker, HDFS). The wildcard '*' configuration is
used when there is no exact match for an authority. The HADOOP_CONF_DIR contains
the relevant Hadoop *-site.xml files. If the path is relative is looked within
the Oozie configuration directory; though the path can be absolute (i.e. to point
to Hadoop client conf/ directories in the local filesystem.
</description>
</property>
<property>
<name>oozie.service.WorkflowAppService.system.libpath</name>
<value>hdfs:///user/${user.name}/share/lib</value>
<description>
System library path to use for workflow applications.
This path is added to workflow application if their job properties sets
the property 'oozie.use.system.libpath' to true.
</description>
</property>
|
1
2
3
4
5
6
7
8
9
10
11
| # Create oozie database
ooziedb.sh create -sqlfile oozie.sql -run
# Start Oozie Service
oozied.sh start
# Verify status of Oozie service
oozie admin --oozie http://localhost:11000/oozie -status
# Web UI
http://localhost:11000/oozie/
|