Skip to content

Commit

Permalink
Modify parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
cryeo committed Feb 13, 2020
1 parent 1b5e6bd commit 3a93f54
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 13 deletions.
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Spark SQL magic command for Jupyter notebooks.

## Prerequisites
- Python >= 3.6
- PySpark >= 2.3.0
- PySpark >= 2.3.0
- IPython >= 7.4.0

## Install
Expand All @@ -17,22 +17,22 @@ pip install sparksql-magic
## Usage

### Load
```ba
```
%load_ext sparksql_magic
```

### Config
```
%config SparkSql.max_num_rows=<INT>
%config SparkSql.limit=<INT>
```

|Option|Default|Description|
|---|---|---|
|`SparkSql.max_num_rows`|20|The maximum number of rows to display|
|`SparkSql.limit`|20|The maximum number of rows to display|

### Parameter
```
%%sparksql [-c|--cache] [-e|--eager] [-v|--view VIEW] [variable]
%%sparksql [-c|--cache] [-e|--eager] [-v|--view VIEW] [-l|--limit LIMIT] [variable]
<QUERY>
```

Expand All @@ -41,6 +41,7 @@ pip install sparksql-magic
|`-c` `--cache`|Cache dataframe|
|`-e` `--eager`|Cache dataframe with eager load|
|`-v VIEW` `--view VIEW`|Create or replace temporary view|
|`-l LIMIT` `--limit LIMIT`|The maximum number of rows to display (Default: `SparkSql.limit`)|
|`variable`|Capture dataframe in a local variable|


Expand Down
2 changes: 1 addition & 1 deletion sparksql_magic/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.0.2'
__version__ = '0.0.3'

from .sparksql import SparkSql

Expand Down
16 changes: 9 additions & 7 deletions sparksql_magic/sparksql.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

@magics_class
class SparkSql(Magics):
max_num_rows = Int(20, config=True, help='The maximum number of rows to display')
limit = Int(20, config=True, help='The maximum number of rows to display')

@needs_local_scope
@cell_magic
Expand All @@ -21,6 +21,7 @@ class SparkSql(Magics):
@argument('-c', '--cache', action='store_true', help='Cache dataframe')
@argument('-e', '--eager', action='store_true', help='Cache dataframe with eager load')
@argument('-v', '--view', type=str, help='Create or replace temporary view')
@argument('-l', '--limit', type=int, help='The maximum number of rows to display')
def sparksql(self, line='', cell='', local_ns=None):
if local_ns is None:
local_ns = {}
Expand Down Expand Up @@ -49,14 +50,15 @@ def sparksql(self, line='', cell='', local_ns=None):
print('capture dataframe to local variable `%s`' % args.variable)
self.shell.user_ns.update({args.variable: df})

header, contents = get_results(df, self.max_num_rows)
if len(contents) > self.max_num_rows:
print('only showing top %d row(s)' % self.max_num_rows)
limit = args.limit or self.limit
header, contents = get_results(df, limit)
if len(contents) > limit:
print('only showing top %d row(s)' % limit)

html = make_tag('tr',
''.join(map(lambda x: make_tag('td', escape(x), style='font-weight: bold'), header)),
style='border-bottom: 1px solid')
for index, row in enumerate(contents[:self.max_num_rows]):
for index, row in enumerate(contents[:limit]):
html += make_tag('tr', ''.join(map(lambda x: make_tag('td', escape(x)), row)))

return HTML(make_tag('table', html))
Expand All @@ -72,14 +74,14 @@ def fetch_variable(match):
return re.sub(BIND_VARIABLE_PATTERN, fetch_variable, query)


def get_results(df, max_num_rows):
def get_results(df, limit):
def convert_value(value):
if value is None:
return 'null'
return str(value)

header = df.columns
contents = list(map(lambda row: list(map(convert_value, row)), df.take(max_num_rows + 1)))
contents = list(map(lambda row: list(map(convert_value, row)), df.take(limit + 1)))

return header, contents

Expand Down

0 comments on commit 3a93f54

Please sign in to comment.