App-Oozie

 view release on metacpan or  search on metacpan

eg/workflows/cpan-sample-workflow/workflow.xml  view on Meta::CPAN

            <!-- flagDirForToday is populated on coordinator.xml -->
            <mkdir  path='${flagDirForToday}'    />
            <touchz path='${flagDirForToday}/OK' />
        </fs>
        <ok    to="join_example" />
        <error to="kill"         />
    </action>

    <!-- Sqoop action -->

    <action name           = "sqoop_example"
            retry-max      = "3"
            retry-interval = "10"
            >
        <shell xmlns="uri:oozie:shell-action:0.3">
            <exec>get-config.pl</exec>
            <file>get-config.pl</file>
            <capture-output/>
        </shell>
        <ok    to="sqoop_example_sqoop" />
        <error to="join_example"        />
    </action>

    <action name           = "sqoop_example_sqoop"
            retry-max      = "3"
            retry-interval = "10"
        >
        <sqoop xmlns="uri:oozie:sqoop-action:0.4">
            <!--
                Accepts delete and mkdir; for hive imports, remove the
                leftovers of a potentially failed previous import
            -->
            <prepare>
                <delete path="hdfs:///user/hive/warehouse/my_table_name" />
            </prepare>

            <configuration>
                <!-- Put the import in the proper pool for throttling -->
                <property>
                    <name>mapred.fairscheduler.pool</name>
                    <value>my-big-fat-pool</value>
                </property>
                <!-- Other properties, like  job name, pool name, etc -->
            </configuration>

            <!--
                Needs some variables, put them in global for instance.
                For the DB passwords, instead of passing a password
                as a bare string, you need to use an HDFS secret (see below)/
                The secret may not be available for new databases, if
                this is the case please contact Team BigData.
                -->

            <arg>import</arg>

            <!--
                Notice that the configuration is acceseed by the
                action name: "sqoop_example"
            -->
            <arg>--connect</arg>
            <arg>jdbc:mysql://${wf:actionData('sqoop_example')['db_host']}/${wf:actionData('sqoop_example')['db_schema']?connectTimeout=300000&amp;socketTimeout=7200000</arg>

            <arg>--username</arg>
            <arg>${wf:actionData('sqoop_example')['db_user']}</arg>

            <arg>--password-file</arg>
            <arg>${wf:actionData('sqoop_example')['db_password_file']}</arg>

            <arg>--num-mappers</arg>
            <arg>64</arg>

            <!-- other sqoop options can be added at this point -->
        </sqoop>
        <ok    to="join_example" />
        <error to="kill"         />
    </action>

    <!-- Shell (any binary) action -->
    <action name           = "shell_example"
            retry-max      = "3"
            retry-interval = "10"
        >
        <shell xmlns="uri:oozie:shell-action:0.3">
            <!-- Every action has a configuration section scoped to the action itself -->
            <configuration>
                <property>
                    <name>mapreduce.job.name</name>
                    <value>example-thingie</value>
                </property>
            </configuration>

            <exec>myprogram.pl</exec>
            <argument>--dryrun</argument>
            <argument>--whatever=foo</argument>

            <file>myprogram.pl</file>

            <!-- Capture the output (foo=bar pairs on the program's stdout) -->
            <capture-output/>

        </shell>
        <ok    to="join_example" />
        <error to="kill"         />
    </action>

    <!-- Fork no more -->
    <join name = "join_example"
          to   = "mail_example"
    />

    <!-- Send an email -->
    <action name           = "mail_example"
            retry-max      = "3"
            retry-interval = "10"
        >
        <email xmlns="uri:oozie:email-action:0.1">
            <!--
                Remember the emailTo variable in global section?

                Also note that ${clusterName} is populated by default when a
                job is deployed using oozie-deploy



( run in 0.771 second using v1.01-cache-2.11-cpan-97f6503c9c8 )