diff --git a/.gitignore b/.gitignore index 68bc17f..cf39498 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +.vscode +sql/solution.sql +data/fashion_magazines.csv + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/README.md b/README.md index 60c4483..820eb87 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,85 @@ -# fashion-magazines +# Fashion Magazines Code Louisville Data Analysis Exercise + + +## Overview + +In this exercise we will write a SQL query against a database of magazine +subscriptions. This exercisee is based on the Codecademy "Multiple Tables" +lesson. + +### Schema +![database schema](img/schema.png) + +### Table: customers +| column | type | constraint | +| ------ | ---- | ---------- | +| customer_id | INTEGER | PRIMARY KEY | +| customer_name | TEXT | NOT NULL | +| address | TEXT | NOT NULL | + + +### Table: subscriptions +| column | type | constraint | +| ------ | ---- | ---------- | +| subscription_id | INTEGER | PRIMARY KEY | +| description | TEXT | NOT NULL | +| price_per_month | INTEGER | NOT NULL | +| subscription_length | INTEGER | NOT NULL | + +### Table: orders +| column | type | constraint | +| ------ | ---- | ---------- | +| order_id | INTEGER | PRIMARY KEY | +| customer_id | INTEGER | FOREIGN KEY | +| subscription_id | INTEGER | FOREIGN KEY | +| purchase_date | TEXT | NOT NULL | +| order_status | TEXT | NOT NULL | + +### Requirements + +Write a SQL query that returns the customer name and total amount due for the +customers that have unpaid Fashion Magazine subscriptions. Note that the column +names in the resulting file need to match the column names in the example below. + +Hints +- You will need to join the customers, subscriptions, and orders tables +- You will need to multiply the subscirption price with the subscription length +to get the total amount due +- You will need to sum the amount due to account for customers that have more +than one unpaid Fashion Magazine subscriptions +### Example Output + +| Customer | Amount Due | +| -------- | ---------- | +| Bethann Schraub | 102 | +| Eryn Vilar | 102 | +| Janay Priolo | 57 | +| Lizabeth Letsche | 237 | + + +## Insructions + +1. Clone the repo to your machine. +1. Create a virtual environment and install the packages listed in the `requirements.txt` file. +1. Add your SQL query to the `sql/fashion_magazines.sql` file. +1. Run the `run_sql.py` script. +1. Add, Commit, and Push your `sql/fashion_magazines.sql` and `data/fashion_magazines.csv` files back to GitHub. + +### Virutal Environment Instructions + +1. After you have cloned the repo to your machine, navigate to the project folder in GitBash/Terminal. +1. Create a virtual environment in the project folder. `python3 -m venv venv` [^1] +1. Activate the virtual environment. `source venv/bin/activate` +1. Install the required packages. `pip install -r requirements.txt` +1. When you are done working on your repo, deactivate the virtual environment. `deactivate` + +[^1]: GitBash on Windows uses “python” instead of “python3” + +### Automated Testing + +This repo contains a small testing program that is automatically run by GitHub to validate your code. This testing program is contained in the tests.py file. You don't have to do anything with this file to complete the exercise, but you can follow these steps if you would like to run the tests on your machine. + +1. Open GitBash in Windows or the Terminal in Mac and navigate to the project folder. +1. Use the following command to run the tests: `pytest tests.py`. +1. Review the output from running the test. This will let you know whether your code produces the expected results. \ No newline at end of file diff --git a/create_db.py b/create_db.py new file mode 100644 index 0000000..a41a92e --- /dev/null +++ b/create_db.py @@ -0,0 +1,86 @@ +import sqlite3 +from sqlite3 import Error +from sqlite3 import OperationalError +import os + +def create_connection(path_to_db_file: str) -> sqlite3.Connection: + """ create a database connection to the SQLite database + specified by db_file + :param db_file: database file + :return: Connection object or None + """ + conn = None + try: + conn = sqlite3.connect(path_to_db_file) + return conn + except Error as e: + print(e) + + return conn + + +def execute_sql(conn: sqlite3.Connection, sql_file: str) -> None: + """ create a table from the create_table_sql statement + :param conn: Connection object + :param sql_file: one or more sql commands + :return: + """ + + sqlCommands = sql_file.split(';') + + for command in sqlCommands: + try: + conn.execute(command) + conn.commit() + except OperationalError as msg: + print("Command skipped: ", msg) + + return None + + +def get_sql(file_path: str) -> str: + """retrieve the SQL commands from a text file + @param: file_path - the path to the text file + @return: str - a string containing the contents of the file""" + fd = open(file_path, 'r') + sql = fd.read() + fd.close() + return sql + + +def validate_db(conn: sqlite3.Connection) -> None: + """validate that the databse was set up correctly + @param conn: a connection to the database""" + try: + result = conn.execute("select * from customers;") + rows = result.fetchall() + if len(rows) != 10: + print('data did not load') + else: + print('setup complete') + except Error as e: + print(e) + + +def main() -> None: + database = "db/fashion_magazines.db" + create_db_script = "sql/create_db.sql" + + if os.path.exists(database): + os.remove(database) + + if os.path.exists(database): + os.remove(database) + + conn = create_connection(database) + + if conn is not None: + execute_sql(conn, get_sql(create_db_script)) + validate_db(conn) + conn.close() + + return None + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/db/fashion_magazines.db b/db/fashion_magazines.db new file mode 100644 index 0000000..8d171b4 Binary files /dev/null and b/db/fashion_magazines.db differ diff --git a/fashion-magazines.session.sql b/fashion-magazines.session.sql new file mode 100644 index 0000000..b4cc433 --- /dev/null +++ b/fashion-magazines.session.sql @@ -0,0 +1,8 @@ +SELECT customers.customer_name, + sum((subscriptions.price_per_month * subscriptions.subscription_length)) as 'Amount Due' +FROM orders + left join subscriptions on orders.subscription_id = subscriptions.subscription_id + left join customers on orders.customer_id = customers.customer_id +WHERE subscription.description = 'Fashion Magazine' + AND orders.order_status = 'unpaid' +GROUP BY customers.customer_name; \ No newline at end of file diff --git a/img/schema.png b/img/schema.png new file mode 100644 index 0000000..3b2cf4b Binary files /dev/null and b/img/schema.png differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..854f3b4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +exceptiongroup==1.1.3 +iniconfig==2.0.0 +numpy==1.25.2 +packaging==23.1 +pandas==2.1.0 +pluggy==1.3.0 +pytest==7.4.1 +python-dateutil==2.8.2 +pytz==2023.3 +six==1.16.0 +tomli==2.0.1 +tzdata==2023.3 diff --git a/run_sql.py b/run_sql.py new file mode 100644 index 0000000..42916da --- /dev/null +++ b/run_sql.py @@ -0,0 +1,101 @@ +import sqlite3 +from sqlite3 import Error +import pandas as pd +import argparse +import os + +# Python script to execute a SQL statement and store the results in a CSV file. +# +# Usage: +# python3 run_sql.py [path_to_db] [path_to_sql] [path_to_csv] +# +# Where: +# +# path_to_db: the path to the sqlite3 database file. default is +# "data/fashion_magazines.db" +# +# path_to_sql: the path to the file containing the sql query. default is +# "sql/fashion_magazines.sql" +# +# path_to_csv: the path to the csv file that will be created with the results +# of the query. default is "data/fashion_magazines.csv" + + +def get_paths() -> tuple: + """Get the paths names from the arguments passed in + @return a tuple containing (path_to_db, path_to_sql, path_to_csv) + """ + parser = argparse.ArgumentParser() + parser.add_argument("db", nargs="?", + help="path to the sqlite3 database file", + default="db/fashion_magazines.db") + parser.add_argument("sql", nargs="?", + help="path to the file containing the sql query", + default="sql/fashion_magazines.sql") + parser.add_argument("csv", nargs="?", + help="path to the csv file that will be created", + default="data/fashion_magazines.csv") + args = parser.parse_args() + return args.db, args.sql, args.csv + + +def create_connection(path_to_db_file: str) -> sqlite3.Connection: + """ create a database connection to the SQLite database + specified by db_file + :param db_file: database file + :return: Connection object or None + """ + conn = None + try: + conn = sqlite3.connect(path_to_db_file) + return conn + except Error as e: + print(e) + + return conn + + +def get_sql(file_path: str) -> str: + """retrieve the SQL commands from a text file + @param: file_path - the path to the text file + @return: str - a string containing the contents of the file""" + fd = open(file_path, 'r') + sql = fd.read() + fd.close() + + return sql + + +def main() -> None: + path_to_db, path_to_sql, path_to_csv = get_paths() + conn = create_connection(path_to_db) + sql = get_sql(path_to_sql) + + if sql == "-- Add your SQL here" or sql == "": + print("Error: Add your sql to the sql/fashion_magazines.sql file before running.") + exit(1) + + if conn is not None: + movies = pd.read_sql(sql, conn) + + if len(movies) == 0: + print("Error: query did not return any results") + exit(1) + csv_dir = os.path.dirname(path_to_csv) + + if not os.path.exists(csv_dir): + os.makedirs(csv_dir) + + movies.to_csv(path_to_csv, index=False) + + else: + print("Error: Could not connect to database.") + exit(1) + + conn.close() + + return None + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/sql/create_db.sql b/sql/create_db.sql new file mode 100644 index 0000000..18dd803 --- /dev/null +++ b/sql/create_db.sql @@ -0,0 +1,73 @@ +DROP TABLE IF EXISTS subscriptions; +CREATE TABLE subscriptions ( + subscription_id INTEGER PRIMARY KEY, + description TEXT NOT NULL, + price_per_month INTEGER NOT NULL, + subscription_length INTEGER NOT NULL +); + +DROP TABLE IF EXISTS orders; +CREATE TABLE orders ( + order_id INTEGER PRIMARY KEY, + customer_id INTEGER NOT NULL, + subscription_id INTEGER NOT NULL, + purchase_date TEXT NOT NULL, + order_status TEXT NOT NULL, + FOREIGN KEY(customer_id) REFERENCES customers(customer_id), + FOREIGN KEY(subscription_id) REFERENCES subscriptions(subscription_id) +); + +DROP TABLE IF EXISTS customers; +CREATE TABLE customers ( + customer_id INTEGER PRIMARY KEY, + customer_name TEXT NOT NULL, + address TEXT NOT NULL +); + +INSERT INTO subscriptions (subscription_id, description, price_per_month, subscription_length) +VALUES +(1, 'Politics Magazine', 10, '12'), +(2, 'Politics Magazine', 11, '6'), +(3, 'Politics Magazine', 12, '3'), +(4, 'Fashion Magazine', 15, '12'), +(5, 'Fashion Magazine', 17, '6'), +(6, 'Fashion Magazine', 19, '3'), +(7, 'Sports Magazine', 11, '12'), +(8, 'Sports Magazine', 12, '6'), +(0, 'Sports Magazine', 13, '3'); + +INSERT INTO customers (customer_id, customer_name, address) +VALUES +(1, 'Allie Rahaim', '123 Broadway'), +(2, 'Jacquline Diddle', '456 Park Ave.'), +(3, 'Lizabeth Letsche', '789 Main St.'), +(4, 'Jessia Butman', '1 Columbus Ave.'), +(5, 'Inocencia Goyco', '12 Amsterdam Ave.'), +(6, 'Bethann Schraub', '29 Monticello'), +(7, 'Janay Priolo', '81 Harrisburg'), +(8, 'Ophelia Sturdnant', '31 Deerfield Ave.'), +(9, 'Eryn Vilar', '56 Morton St.'), +(10, 'Jina Farraj', '100 Bryan Ave.'); + +INSERT INTO orders (order_id, customer_id, subscription_id, purchase_date, order_status) +VALUES +(10, 1, 7, '2016-01-26 00:00:00', 'paid'), +(15, 1, 2, '2017-01-06 00:00:00', 'paid'), +(16, 1, 2, '2017-07-06 00:00:00', 'unpaid'), +(20, 1, 7, '2017-01-26 00:00:00', 'unpaid'), +(2, 2, 4, '2017-01-09 00:00:00', 'paid'), +(1, 3, 2, '2017-01-10 00:00:00', 'paid'), +(3, 3, 4, '2017-01-26 00:00:00', 'unpaid'), +(12, 3, 2, '2017-07-10 00:00:00', 'unpaid'), +(13, 3, 5, '2017-01-03 00:00:00', 'paid'), +(17, 3, 6, '2017-07-03 00:00:00', 'unpaid'), +(18, 3, 8, '2017-01-29 00:00:00', 'paid'), +(9, 4, 4, '2017-01-25 00:00:00', 'paid'), +(19, 4, 9, '2017-01-03 00:00:00', 'paid'), +(7, 5, 8, '2017-01-11 00:00:00', 'paid'), +(11, 5, 4, '2017-01-07 00:00:00', 'paid'), +(14, 6, 5, '2017-01-22 00:00:00', 'unpaid'), +(5, 7, 6, '2017-01-25 00:00:00', 'unpaid'), +(6, 8, 2, '2017-01-18 00:00:00', 'paid'), +(4, 9, 9, '2017-01-04 00:00:00', 'paid'), +(8, 9, 5, '2017-01-26 00:00:00', 'unpaid'); \ No newline at end of file diff --git a/sql/fashion_magazines.sql b/sql/fashion_magazines.sql new file mode 100644 index 0000000..4dd0555 --- /dev/null +++ b/sql/fashion_magazines.sql @@ -0,0 +1 @@ +-- Add your sql here \ No newline at end of file