Skip to content

Commit

Permalink
Merge pull request #20 from civitaspo/v0.1.9
Browse files Browse the repository at this point in the history
V0.1.9
  • Loading branch information
civitaspo committed Feb 8, 2016
2 parents b770415 + f14c622 commit 3ef0050
Show file tree
Hide file tree
Showing 17 changed files with 682 additions and 40 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
/classpath/
build/
.idea
/.settings/
/.metadata/
.classpath
.project
*.iml
.ruby-version

9 changes: 9 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
language: java
jdk:
- openjdk7
- oraclejdk7
- oraclejdk8
script:
- ./gradlew test
after_success:
- ./gradlew jacocoTestReport coveralls
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Hdfs file input plugin for Embulk
[![Build Status](https://travis-ci.org/civitaspo/embulk-input-hdfs.svg)](https://travis-ci.org/civitaspo/embulk-input-hdfs)
[![Coverage Status](https://coveralls.io/repos/civitaspo/embulk-input-hdfs/badge.svg?branch=master&service=github)](https://coveralls.io/github/civitaspo/embulk-input-hdfs?branch=master)

Read files on Hdfs.

Expand All @@ -16,15 +18,16 @@ Read files on Hdfs.
- **rewind_seconds** When you use Date format in input_path property, the format is executed by using the time which is Now minus this property.
- **partition** when this is true, partition input files and increase task count. (default: `true`)
- **num_partitions** number of partitions. (default: `Runtime.getRuntime().availableProcessors()`)
- **skip_header_lines** Skip this number of lines first. Set 1 if the file has header line. (default: `0`)

## Example

```yaml
in:
type: hdfs
config_files:
- /opt/analytics/etc/hadoop/conf/core-site.xml
- /opt/analytics/etc/hadoop/conf/hdfs-site.xml
- /etc/hadoop/conf/core-site.xml
- /etc/hadoop/conf/hdfs-site.xml
config:
fs.defaultFS: 'hdfs://hadoop-nn1:8020'
dfs.replication: 1
Expand Down Expand Up @@ -106,4 +109,4 @@ $ ./gradlew gem
```
$ ./gradlew classpath
$ bundle exec embulk run -I lib example.yml
```
```
37 changes: 32 additions & 5 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ plugins {
id "com.jfrog.bintray" version "1.1"
id "com.github.jruby-gradle.base" version "0.1.5"
id "java"
id "checkstyle"
id "com.github.kt3k.coveralls" version "2.4.0"
id "jacoco"
}
import com.github.jrubygradle.JRubyExec
repositories {
Expand All @@ -18,12 +21,13 @@ sourceCompatibility = 1.7
targetCompatibility = 1.7

dependencies {
compile "org.embulk:embulk-core:0.7.0"
provided "org.embulk:embulk-core:0.7.0"
compile "org.embulk:embulk-core:0.8.+"
provided "org.embulk:embulk-core:0.8.+"
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
compile 'org.apache.hadoop:hadoop-client:2.6.0'
compile 'com.google.guava:guava:15.0'
compile 'org.apache.hadoop:hadoop-client:2.6.+'
testCompile "junit:junit:4.+"
testCompile "org.embulk:embulk-core:0.8.+:tests"
testCompile "org.embulk:embulk-standards:0.8.+"
}

task classpath(type: Copy, dependsOn: ["jar"]) {
Expand All @@ -33,6 +37,29 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
}
clean { delete "classpath" }

jacocoTestReport {
reports {
xml.enabled = true // coveralls plugin depends on xml format report
html.enabled = true
}
}
checkstyle {
configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
toolVersion = '6.14.1'
}
checkstyleMain {
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
ignoreFailures = true
}
checkstyleTest {
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
ignoreFailures = true
}
task checkstyle(type: Checkstyle) {
classpath = sourceSets.main.output + sourceSets.test.output
source = sourceSets.main.allJava + sourceSets.test.allJava
}

task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
script "${project.name}.gemspec"
Expand All @@ -57,7 +84,7 @@ task gemspec {
Gem::Specification.new do |spec|
spec.name = "${project.name}"
spec.version = "${project.version}"
spec.authors = ["takahiro.nakayama"]
spec.authors = ["Civitaspo"]
spec.summary = %[Hdfs file input plugin for Embulk]
spec.description = %[Reads files stored on Hdfs.]
spec.email = ["[email protected]"]
Expand Down
128 changes: 128 additions & 0 deletions config/checkstyle/checkstyle.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE module PUBLIC
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
<module name="Checker">
<!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
<module name="FileTabCharacter"/>
<module name="NewlineAtEndOfFile">
<property name="lineSeparator" value="lf"/>
</module>
<module name="RegexpMultiline">
<property name="format" value="\r"/>
<property name="message" value="Line contains carriage return"/>
</module>
<module name="RegexpMultiline">
<property name="format" value=" \n"/>
<property name="message" value="Line has trailing whitespace"/>
</module>
<module name="RegexpMultiline">
<property name="format" value="\{\n\n"/>
<property name="message" value="Blank line after opening brace"/>
</module>
<module name="RegexpMultiline">
<property name="format" value="\n\n\s*\}"/>
<property name="message" value="Blank line before closing brace"/>
</module>
<module name="RegexpMultiline">
<property name="format" value="\n\n\n"/>
<property name="message" value="Multiple consecutive blank lines"/>
</module>
<module name="RegexpMultiline">
<property name="format" value="\n\n\Z"/>
<property name="message" value="Blank line before end of file"/>
</module>
<module name="RegexpMultiline">
<property name="format" value="Preconditions\.checkNotNull"/>
<property name="message" value="Use of checkNotNull"/>
</module>

<module name="TreeWalker">
<module name="EmptyBlock">
<property name="option" value="text"/>
<property name="tokens" value="
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
</module>
<module name="EmptyStatement"/>
<module name="EmptyForInitializerPad"/>
<module name="EmptyForIteratorPad">
<property name="option" value="space"/>
</module>
<module name="MethodParamPad">
<property name="allowLineBreaks" value="true"/>
<property name="option" value="nospace"/>
</module>
<module name="ParenPad"/>
<module name="TypecastParenPad"/>
<module name="NeedBraces"/>
<module name="LeftCurly">
<property name="option" value="nl"/>
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
</module>
<module name="LeftCurly">
<property name="option" value="eol"/>
<property name="tokens" value="
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
</module>
<module name="RightCurly">
<property name="option" value="alone"/>
</module>
<module name="GenericWhitespace"/>
<module name="WhitespaceAfter"/>
<module name="NoWhitespaceBefore"/>

<module name="UpperEll"/>
<module name="DefaultComesLast"/>
<module name="ArrayTypeStyle"/>
<module name="MultipleVariableDeclarations"/>
<module name="ModifierOrder"/>
<module name="OneStatementPerLine"/>
<module name="StringLiteralEquality"/>
<module name="MutableException"/>
<module name="EqualsHashCode"/>
<module name="InnerAssignment"/>
<module name="InterfaceIsType"/>
<module name="HideUtilityClassConstructor"/>

<module name="MemberName"/>
<module name="LocalVariableName"/>
<module name="LocalFinalVariableName"/>
<module name="TypeName"/>
<module name="PackageName"/>
<module name="ParameterName"/>
<module name="StaticVariableName"/>
<module name="ClassTypeParameterName">
<property name="format" value="^[A-Z][0-9]?$"/>
</module>
<module name="MethodTypeParameterName">
<property name="format" value="^[A-Z][0-9]?$"/>
</module>

<module name="AvoidStarImport"/>
<module name="RedundantImport"/>
<module name="UnusedImports"/>
<module name="ImportOrder">
<property name="groups" value="*,javax,java"/>
<property name="separated" value="true"/>
<property name="option" value="bottom"/>
<property name="sortStaticImportsAlphabetically" value="true"/>
</module>

<module name="WhitespaceAround">
<property name="allowEmptyConstructors" value="true"/>
<property name="allowEmptyMethods" value="true"/>
<property name="ignoreEnhancedForColon" value="false"/>
<property name="tokens" value="
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
</module>
</module>
</module>
108 changes: 108 additions & 0 deletions config/checkstyle/default.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE module PUBLIC
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
<!--
This is a subset of ./checkstyle.xml which allows some loose styles
-->
<module name="Checker">
<module name="FileTabCharacter"/>
<module name="NewlineAtEndOfFile">
<property name="lineSeparator" value="lf"/>
</module>
<module name="RegexpMultiline">
<property name="format" value="\r"/>
<property name="message" value="Line contains carriage return"/>
</module>
<module name="RegexpMultiline">
<property name="format" value=" \n"/>
<property name="message" value="Line has trailing whitespace"/>
</module>
<module name="RegexpMultiline">
<property name="format" value="\n\n\n"/>
<property name="message" value="Multiple consecutive blank lines"/>
</module>
<module name="RegexpMultiline">
<property name="format" value="\n\n\Z"/>
<property name="message" value="Blank line before end of file"/>
</module>

<module name="TreeWalker">
<module name="EmptyBlock">
<property name="option" value="text"/>
<property name="tokens" value="
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
</module>
<module name="EmptyStatement"/>
<module name="EmptyForInitializerPad"/>
<module name="EmptyForIteratorPad">
<property name="option" value="space"/>
</module>
<module name="MethodParamPad">
<property name="allowLineBreaks" value="true"/>
<property name="option" value="nospace"/>
</module>
<module name="ParenPad"/>
<module name="TypecastParenPad"/>
<module name="NeedBraces"/>
<module name="LeftCurly">
<property name="option" value="nl"/>
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
</module>
<module name="LeftCurly">
<property name="option" value="eol"/>
<property name="tokens" value="
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
</module>
<module name="RightCurly">
<property name="option" value="alone"/>
</module>
<module name="GenericWhitespace"/>
<module name="WhitespaceAfter"/>
<module name="NoWhitespaceBefore"/>

<module name="UpperEll"/>
<module name="DefaultComesLast"/>
<module name="ArrayTypeStyle"/>
<module name="MultipleVariableDeclarations"/>
<module name="ModifierOrder"/>
<module name="OneStatementPerLine"/>
<module name="StringLiteralEquality"/>
<module name="MutableException"/>
<module name="EqualsHashCode"/>
<module name="InnerAssignment"/>
<module name="InterfaceIsType"/>
<module name="HideUtilityClassConstructor"/>

<module name="MemberName"/>
<module name="LocalVariableName"/>
<module name="LocalFinalVariableName"/>
<module name="TypeName"/>
<module name="PackageName"/>
<module name="ParameterName"/>
<module name="StaticVariableName"/>
<module name="ClassTypeParameterName">
<property name="format" value="^[A-Z][0-9]?$"/>
</module>
<module name="MethodTypeParameterName">
<property name="format" value="^[A-Z][0-9]?$"/>
</module>

<module name="WhitespaceAround">
<property name="allowEmptyConstructors" value="true"/>
<property name="allowEmptyMethods" value="true"/>
<property name="ignoreEnhancedForColon" value="false"/>
<property name="tokens" value="
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
</module>
</module>
</module>
35 changes: 35 additions & 0 deletions example/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
hdfs_example: &hdfs_example
config_files:
- /etc/hadoop/conf/core-site.xml
- /etc/hadoop/conf/hdfs-site.xml
config:
fs.defaultFS: 'hdfs://hadoop-nn1:8020'
fs.hdfs.impl: 'org.apache.hadoop.hdfs.DistributedFileSystem'
fs.file.impl: 'org.apache.hadoop.fs.LocalFileSystem'

local_fs_example: &local_fs_example
config:
fs.defaultFS: 'file:///'
fs.hdfs.impl: 'org.apache.hadoop.fs.LocalFileSystem'
fs.file.impl: 'org.apache.hadoop.fs.LocalFileSystem'

in:
type: hdfs
<<: *local_fs_example
path: example/data.csv
parser:
charset: UTF-8
newline: CRLF
type: csv
delimiter: ','
quote: '"'
header_line: true
columns:
- {name: id, type: long}
- {name: account, type: long}
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
- {name: purchase, type: timestamp, format: '%Y%m%d'}
- {name: comment, type: string}

out:
type: stdout
Loading

0 comments on commit 3ef0050

Please sign in to comment.