From 3a93f5431ccbc63686fa10510a115e55ac38b54a Mon Sep 17 00:00:00 2001 From: Chaerim Yeo Date: Thu, 13 Feb 2020 13:31:59 +0900 Subject: [PATCH] Modify parameters --- README.md | 11 ++++++----- sparksql_magic/__init__.py | 2 +- sparksql_magic/sparksql.py | 16 +++++++++------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 8975d95..86a2e60 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Spark SQL magic command for Jupyter notebooks. ## Prerequisites - Python >= 3.6 -- PySpark >= 2.3.0 +- PySpark >= 2.3.0 - IPython >= 7.4.0 ## Install @@ -17,22 +17,22 @@ pip install sparksql-magic ## Usage ### Load -```ba +``` %load_ext sparksql_magic ``` ### Config ``` -%config SparkSql.max_num_rows= +%config SparkSql.limit= ``` |Option|Default|Description| |---|---|---| -|`SparkSql.max_num_rows`|20|The maximum number of rows to display| +|`SparkSql.limit`|20|The maximum number of rows to display| ### Parameter ``` -%%sparksql [-c|--cache] [-e|--eager] [-v|--view VIEW] [variable] +%%sparksql [-c|--cache] [-e|--eager] [-v|--view VIEW] [-l|--limit LIMIT] [variable] ``` @@ -41,6 +41,7 @@ pip install sparksql-magic |`-c` `--cache`|Cache dataframe| |`-e` `--eager`|Cache dataframe with eager load| |`-v VIEW` `--view VIEW`|Create or replace temporary view| +|`-l LIMIT` `--limit LIMIT`|The maximum number of rows to display (Default: `SparkSql.limit`)| |`variable`|Capture dataframe in a local variable| diff --git a/sparksql_magic/__init__.py b/sparksql_magic/__init__.py index b780079..29a53f6 100644 --- a/sparksql_magic/__init__.py +++ b/sparksql_magic/__init__.py @@ -1,4 +1,4 @@ -__version__ = '0.0.2' +__version__ = '0.0.3' from .sparksql import SparkSql diff --git a/sparksql_magic/sparksql.py b/sparksql_magic/sparksql.py index c329afe..3e567be 100644 --- a/sparksql_magic/sparksql.py +++ b/sparksql_magic/sparksql.py @@ -12,7 +12,7 @@ @magics_class class SparkSql(Magics): - max_num_rows = Int(20, config=True, help='The maximum number of rows to display') + limit = Int(20, config=True, help='The maximum number of rows to display') @needs_local_scope @cell_magic @@ -21,6 +21,7 @@ class SparkSql(Magics): @argument('-c', '--cache', action='store_true', help='Cache dataframe') @argument('-e', '--eager', action='store_true', help='Cache dataframe with eager load') @argument('-v', '--view', type=str, help='Create or replace temporary view') + @argument('-l', '--limit', type=int, help='The maximum number of rows to display') def sparksql(self, line='', cell='', local_ns=None): if local_ns is None: local_ns = {} @@ -49,14 +50,15 @@ def sparksql(self, line='', cell='', local_ns=None): print('capture dataframe to local variable `%s`' % args.variable) self.shell.user_ns.update({args.variable: df}) - header, contents = get_results(df, self.max_num_rows) - if len(contents) > self.max_num_rows: - print('only showing top %d row(s)' % self.max_num_rows) + limit = args.limit or self.limit + header, contents = get_results(df, limit) + if len(contents) > limit: + print('only showing top %d row(s)' % limit) html = make_tag('tr', ''.join(map(lambda x: make_tag('td', escape(x), style='font-weight: bold'), header)), style='border-bottom: 1px solid') - for index, row in enumerate(contents[:self.max_num_rows]): + for index, row in enumerate(contents[:limit]): html += make_tag('tr', ''.join(map(lambda x: make_tag('td', escape(x)), row))) return HTML(make_tag('table', html)) @@ -72,14 +74,14 @@ def fetch_variable(match): return re.sub(BIND_VARIABLE_PATTERN, fetch_variable, query) -def get_results(df, max_num_rows): +def get_results(df, limit): def convert_value(value): if value is None: return 'null' return str(value) header = df.columns - contents = list(map(lambda row: list(map(convert_value, row)), df.take(max_num_rows + 1))) + contents = list(map(lambda row: list(map(convert_value, row)), df.take(limit + 1))) return header, contents