#!/usr/bin/env python # coding: utf-8 # In[ ]: get_ipython().run_line_magic('pip', 'install --upgrade sqlite_utils') get_ipython().run_line_magic('pip', 'install --upgrade csvs-to-sqlite') # # Quick Sketch - `sqlite_utils` magic # # Wondering: what might `sqlite_utils` magic look like, and why should we bother? # # One way for it to look would be to start to replicate the CLI. # # As to why bother? Maybe folk are using a notebook but they aren't a Python programmer, or maybe they just want some shortcut, bespoke magic to do common operations in a CLI rather than a Python function way. # # Along the way, we might as well make use of other tools in @simonw's `-to-sqlite` universe. # ## Base magic — WIP # # Let's have a quick hack at some magic.. # In[159]: from IPython.core.magic import magics_class, line_magic, cell_magic, line_cell_magic, Magics from IPython.core import magic_arguments from sqlite_utils import Database from io import StringIO import pandas as pd import json import requests @magics_class class SQLite_Utils(Magics): def clean_string(self, string): """Clean string argument.""" if isinstance(string, str): string = string.strip("\"\'") return f'{string}' return string def download_file(self, url, filename=None, chunk_size=8192): """Download file.""" # We stream in case the file is a large one... # Should we download to a temporary file? # Or is persistence good for audit / if anything goes wrong? fn = url.split('/')[-1] if not filename else filename with requests.get(url, stream=True) as r: #r.raise_for_status() # Error reporting with open(fn, 'wb') as f: for chunk in r.iter_content(chunk_size=chunk_size): f.write(chunk) return fn # We'll simplify things for now by saying you MUST call this first @line_magic @magic_arguments.magic_arguments() @magic_arguments.argument("--dbvar", "-d", default='_sqlite_utils_db', help="DB var.") @magic_arguments.argument("--memory", "-m", action='store_true', help="DB var.") @magic_arguments.argument("--recreate", "-r", action='store_true', help="DB var.") @magic_arguments.argument( "--filename", "-f", default="sqlite_utils_magic.db", help="SQLite database pathname.") def sql_utils_create(self, line): "Create a database." args = magic_arguments.parse_argstring(self.sql_utils_create, line) # TO DO - the --recreate flag seems to create a read only database? if args.memory: # This is a bit of trickery - keep the db reference in a variable # in the parent Python environment self.shell.user_ns[args.dbvar] = Database(memory=True, recreate=args.recreate) else: self.shell.user_ns[args.dbvar] = Database(args.filename, recreate=args.recreate) self.shell.user_ns['_sql_utils_magic_db_path'] =args.filename @line_magic @magic_arguments.magic_arguments() @magic_arguments.argument("--dbvar", "-d", default='_sqlite_utils_db', help="DB var.") @magic_arguments.argument("--table", "-t", default=None, help="Table name.") @magic_arguments.argument("--columns", "-c", action='store_true', help="Show columns.") def sql_utils_tables(self, line): """Show tables.""" args = magic_arguments.parse_argstring(self.sql_utils_tables, line) db = self.shell.user_ns[args.dbvar] if args.table and args.columns: return db[args.table].columns return db.table_names() @line_magic @magic_arguments.magic_arguments() @magic_arguments.argument("--dbvar", "-d", default='_sqlite_utils_db', help="DB var.") def sql_utils_views(self, line): """Show views.""" args = magic_arguments.parse_argstring(self.sql_utils_views, line) db = self.shell.user_ns[args.dbvar] return db.view_names() # Remember for now we have simplified by requirig the db has been set up @line_cell_magic @magic_arguments.magic_arguments() @magic_arguments.argument("--csv", "-c", default=None, help="CSV path.") @magic_arguments.argument("--filename", "-f", default=None, help="Filename.") def sql_utils_import_csv(self, line, cell=None): """Import CSV file.""" args = magic_arguments.parse_argstring(self.sql_utils_import_csv, line) if not args.csv or '_sql_utils_magic_db_path' not in self.shell.user_ns: return if args.csv.startswith('http'): # It's a URL... so download it... fn = self.download_file(args.csv, args.filename) # Given we're in magic land! db_path = self.shell.user_ns['_sql_utils_magic_db_path'] get_ipython().system('csvs-to-sqlite {fn} {db_path}') @cell_magic @magic_arguments.magic_arguments() @magic_arguments.argument("--dbvar", "-d", default='_sqlite_utils_db', help="DB var.") @magic_arguments.argument("--table", "-t", default=None, help="Table name.") def sql_utils_add_rows(self, line, cell): """Add rows from JSON.""" args = magic_arguments.parse_argstring(self.sql_utils_add_rows, line) if not args.table: return db = self.shell.user_ns[args.dbvar] df = pd.DataFrame(StringIO(cell)) df = pd.json_normalize(df[0].str.strip().apply(json.loads)) db[args.table].insert_all(df.to_dict(orient='records')) @line_cell_magic @magic_arguments.magic_arguments() @magic_arguments.argument("--dbvar", "-d", default='_sqlite_utils_db', help="DB var.") @magic_arguments.argument("--create", "-k", action='store_true', help="Create.") @magic_arguments.argument("--table", "-t", default=None, help="Table name.") @magic_arguments.argument("--tabledef", "-T", default=None, help="Table definition.") @magic_arguments.argument("--pk", "-p", default=None, help="Primary key.") @magic_arguments.argument("--fk", "-f", default=None, help="Foreign keys.") @magic_arguments.argument("--columnorder", "-c", default=None, help="Column order.") @magic_arguments.argument("--notnull", "-n", default=None, help="Not null.") def sql_utils_table(self, line, cell=None): args = magic_arguments.parse_argstring(self.sql_utils_table, line) db = self.shell.user_ns[args.dbvar] tabledef = self.clean_string(args.tabledef) pk = self.clean_string(args.pk) fk = self.clean_string(args.fk) columnorder = self.clean_string(args.columnorder) if columnorder: columnorder = tuple([f"{c.strip()}" for c in columnorder.split(',')]) notnull = self.clean_string(args.notnull) if notnull: notnull = set([f"{n.strip()}" for n in notnull.split(',')]) if cell is None and args.table: if args.create: db[args.table].create(tabledef, pk=pk, foreign_keys=fk, column_order=columnorder, not_null=notnull ) else: db.table(args.table, pk=pk, foreign_keys=fk, column_order=columnorder, not_null=notnull ) else: db[args.table].create(eval(cell), pk=pk, foreign_keys=fk, column_order=columnorder, not_null=notnull ) @line_cell_magic @magic_arguments.magic_arguments() @magic_arguments.argument("--dbvar", "-d", default='_sqlite_utils_db', help="DB var.") @magic_arguments.argument("--table", "-t", default=None, help="Table name.") @magic_arguments.argument("--select", "-s", default=None, help="Query.") @magic_arguments.argument("--where", "-w", default=None, help="Where.") @magic_arguments.argument("--orderby", "-o", default=None, help="Order by.") @magic_arguments.argument("--generator", "-G", default=None, help="Return generator.") @magic_arguments.argument("--outvar", "-O", default=None, help="Output variable.") @magic_arguments.argument("--query", "-q", default=None, help="Open query.") @magic_arguments.argument("--get", "-g", default=None, help="Get record by primary key.") def sql_utils_query(self, line, cell=None): args = magic_arguments.parse_argstring(self.sql_utils_query, line) db = self.shell.user_ns[args.dbvar] if cell is None: # Allow a quoted string to be passed via -s if args.table: if args.get: # Does this need heuristic typecasting? # Or can we pass vars that retain type? return db[args.table].get(args.get) else: select = self.clean_string(args.select) where = self.clean_string(args.where) orderby = self.clean_string(args.orderby) resp = db[args.table].rows_where(where=where, select=select, order_by=orderby) if args.generator: return resp df = pd.DataFrame(resp) if args.outvar: self.shell.user_ns[args.outvar] = df return pd.DataFrame(df) elif args.query: # TO DO - DRY - we should be able to abstract most # of this into a separate utility function query = self.clean_string(args.query) df = pd.DataFrame(db.execute(query).fetchall()) if args.outvar: self.shell.user_ns[args.outvar] = df return pd.DataFrame(df) else: # For now assume this is just a query df = pd.DataFrame(db.execute(cell).fetchall()) if args.outvar: self.shell.user_ns[args.outvar] = df return pd.DataFrame(df) ip = get_ipython() ip.register_magics(SQLite_Utils) # Start off by creating a db - at the moment this is a MUST first step. # # Known issues: # # - `recreate` seems to result in a read-only db? # In[129]: #%load_ext SQLite_Utils get_ipython().run_line_magic('sql_utils_create', '--filename mydemo1.db') # We actually have a reference to the database: # In[130]: _sqlite_utils_db # We can add some rows to a table: # In[131]: get_ipython().run_cell_magic('sql_utils_add_rows', '-t test1', '{"id": 1, "name": "Cleo", "twitter": "cleopaws", "age": 3,"is_good_dog": true}\n{"id": 2, "name": "Marnie", "twitter": "MarnieTheDog", "age": 16,"is_good_dog": true}\n{"id": 3, "name": "Cleo2", "twitter": "cleopaws2", "age": 3,"is_good_dog": true}\n{"id": 4, "name": "Marnie2", "twitter": "MarnieTheDog2", "age": 16,"is_good_dog": true}\n') # We can list the tables: # In[132]: get_ipython().run_line_magic('sql_utils_tables', '') # We can query tables: # In[135]: get_ipython().run_line_magic('sql_utils_query', '-t test1 -s name,age') # We can have more elaborate queries: # In[136]: get_ipython().run_line_magic('sql_utils_query', "-t test1 -s 'name, age'") # We can import data from a CSV file downloaded from a URL: # In[94]: get_ipython().run_line_magic('sql_utils_import_csv', '--csv https://www.theyworkforyou.com/mps/?f=csv --filename mps.csv') # Recheck the tables: # In[137]: get_ipython().run_line_magic('sql_utils_tables', '') # In[138]: get_ipython().run_line_magic('sql_utils_query', "-t mps -s '*'") # Run an arbitrary query: # In[139]: df = get_ipython().run_line_magic('sql_utils_query', '-q "SELECT * FROM mps LIMIT 3;"') df # If we run the block magic we assume it's an open query: # In[163]: get_ipython().run_cell_magic('sql_utils_query', '--outvar mydf', 'SELECT *\n-- with a comment\nFROM mps -- and another comment\nLIMIT 10\n') # The `--outvar` switch lets us assign the result to a variable: # In[119]: mydf # We can preconfigure table attributes for when a table is created by an insert: # In[152]: get_ipython().run_line_magic('sql_utils_table', '--table "authors" --pk "id" --notnull "name,score" -c "id,name,score,url"') # We can explictly create a table: # In[160]: get_ipython().run_cell_magic('sql_utils_table', '--table cats --pk "id"', '{\n "id": int,\n "name": str,\n "weight": float,\n}\n') # We can view table columns: # In[161]: get_ipython().run_line_magic('sql_utils_tables', '--table cats -c')