Relationalize All the Things

ddlgenerator

What is Data?

In [1]:
cat animals.csv
name,species,kg,notes
Alfred,wart hog,22,loves turnips
Gertrude,polar bear,312.7,deep thinker
Emily,salamander,0.3,

In [2]:
!ddlgenerator postgresql animals.csv
CREATE TABLE animals (
	name VARCHAR(8) NOT NULL, 
	species VARCHAR(10) NOT NULL, 
	kg DECIMAL(4, 1) NOT NULL, 
	notes VARCHAR(13) NOT NULL, 
	UNIQUE (name), 
	UNIQUE (species), 
	UNIQUE (kg), 
	UNIQUE (notes)
);


In [3]:
!ddlgenerator --inserts postgresql animals.csv
CREATE TABLE animals (
	name VARCHAR(8) NOT NULL, 
	species VARCHAR(10) NOT NULL, 
	kg DECIMAL(4, 1) NOT NULL, 
	notes VARCHAR(13) NOT NULL, 
	UNIQUE (name), 
	UNIQUE (species), 
	UNIQUE (kg), 
	UNIQUE (notes)
);

INSERT INTO animals (name, species, kg, notes) VALUES ('Alfred', 'wart hog', 22, 'loves turnips');
INSERT INTO animals (name, species, kg, notes) VALUES ('Gertrude', 'polar bear', 312.7, 'deep thinker');
INSERT INTO animals (name, species, kg, notes) VALUES ('Emily', 'salamander', 0.3, '');

In [4]:
!psql -c "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'"
 table_name 
------------
(0 rows)


In [5]:
!ddlgenerator --inserts postgresql animals.csv | psql
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "animals_name_key" for table "animals"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "animals_species_key" for table "animals"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "animals_kg_key" for table "animals"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "animals_notes_key" for table "animals"
CREATE TABLE
INSERT 0 1
INSERT 0 1
INSERT 0 1

In [6]:
!psql -c "SELECT * FROM animals"
   name   |  species   |  kg   |     notes     
----------+------------+-------+---------------
 Alfred   | wart hog   |  22.0 | loves turnips
 Gertrude | polar bear | 312.7 | deep thinker
 Emily    | salamander |   0.3 | 
(3 rows)


xml

In [7]:
cat countries.xml
<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank>1</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank>4</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank>68</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

In [8]:
!ddlgenerator --inserts postgresql countries.xml | psql -q
NOTICE:  CREATE TABLE will create implicit sequence "countries_countries_id_seq" for serial column "countries.countries_id"
NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "countries_pkey" for table "countries"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "countries_name_key" for table "countries"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "countries_rank_key" for table "countries"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "countries_gdppc_key" for table "countries"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "countries_neighbor_tag_key" for table "countries"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "countries_neighbor_name_key" for table "countries"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "countries_neighbor_direction_key" for table "countries"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "neighbor_name_key" for table "neighbor"

In [9]:
!psql -c "SELECT * FROM countries"
   tag   |     name      | rank | year | gdppc  | countries_id | neighbor_tag | neighbor_name | neighbor_direction 
---------+---------------+------+------+--------+--------------+--------------+---------------+--------------------
 country | Liechtenstein |    1 | 2008 | 141100 |            1 |              |               | 
 country | Singapore     |    4 | 2011 |  59900 |            2 | neighbor     | Malaysia      | f
 country | Panama        |   68 | 2011 |  13600 |            3 |              |               | 
(3 rows)


json

In [10]:
cat menu.json
[
  {
    "name": "soup",
    "cost": 4.99
  }, 
  { 
    "name": "sweet potatoes",
    "cost": 4.99
  },
  {
    "name": "nuts",
    "warning": "contains nuts", 
    "cost": 2.95 
  }
]


In [11]:
!ddlgenerator --inserts postgresql menu.json | psql -q
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "menu_name_key" for table "menu"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "menu_warning_key" for table "menu"

In [12]:
!psql -c "SELECT * FROM menu"
      name      | cost |    warning    
----------------+------+---------------
 soup           | 4.99 | 
 sweet potatoes | 4.99 | 
 nuts           | 2.95 | contains nuts
(3 rows)


yaml

In [13]:
cat knights.yaml
-
  name: Lancelot
  dob: 9 jan 471
  kg: 82
  brave: y
- 
  name: Gawain
  kg: 69.2
  brave: y
- 
  name: Robin
  dob: 9 jan 471
  brave: n
- 
  name: Reepacheep
  kg: 0.0691
  brave: y

In [14]:
!ddlgenerator --inserts postgresql knights.yaml | psql -q
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "knights_name_key" for table "knights"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "knights_kg_key" for table "knights"

In [15]:
!psql -c "SELECT * FROM knights"
    name    |         dob         |   kg    | brave 
------------+---------------------+---------+-------
 Lancelot   | 0471-01-09 00:00:00 | 82.0000 | t
 Gawain     |                     | 69.2000 | t
 Robin      | 0471-01-09 00:00:00 |         | f
 Reepacheep |                     |  0.0691 | t
(4 rows)


html

In [16]:
!ddlgenerator --inserts postgresql http://en.wikipedia.org/wiki/List_of_cities_in_Ohio | psql -q
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "generated_table0_city_key" for table "generated_table0"

In [17]:
!psql -c "SELECT * FROM generated_table0 LIMIT 10" 
   city    | population |              county              
-----------+------------+----------------------------------
 Akron     | 199,110    | Summit County
 Alliance  | 22,322     | Stark County and Mahoning County
 Amherst   | 12,021     | Lorain County
 Ashland   | 20,362     | Ashland County
 Ashtabula | 19,124     | Ashtabula County
 Athens    | 23,832     | Athens County
 Aurora    | 15,548     | Portage County
 Avon      | 21,193     | Lorain County
 Avon Lake | 22,581     | Lorain County
 Barberton | 26,550     | Summit County
(10 rows)


warning

In [18]:
cat birds.yaml
-
  common name: Northern Cardinal
  scientific name: Carnidalis cardinalis
  length in cm: 21
  state: 
    - 
      name: Illinois
      abbrev: IL
    -
      name: Indiana
      abbrev: IN
    - 
      name: Kentucky
      abbrev: KY
    - 
      name: North Carolina
      abbrev: NC
    - 
      name: Ohio
      abbrev: OH
    - 
      name: Virginia
      abbrev: VA
    - 
      name: West Virginia
      abbrev: WV
- 
  common name: Great Northern Loon
  scientific name: Gavia immer
  state:
    - 
      name: Minnesota
      abbrev: MN

In [19]:
!ddlgenerator --inserts postgresql birds.yaml | psql -q
NOTICE:  CREATE TABLE will create implicit sequence "birds_birds_id_seq" for serial column "birds.birds_id"
NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "birds_pkey" for table "birds"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "birds_common_name_key" for table "birds"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "birds_scientific_name_key" for table "birds"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "birds_length_in_cm_key" for table "birds"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "state_name_key" for table "state"
NOTICE:  CREATE TABLE / UNIQUE will create implicit index "state_abbrev_key" for table "state"

In [20]:
!psql -c "SELECT * FROM birds" 
     common_name     |    scientific_name    | length_in_cm | birds_id 
---------------------+-----------------------+--------------+----------
 Northern Cardinal   | Carnidalis cardinalis |           21 |        1
 Great Northern Loon | Gavia immer           |              |        2
(2 rows)


In [21]:
!psql -c "\d state" 
             Table "public.state"
  Column  |         Type          | Modifiers 
----------+-----------------------+-----------
 name     | character varying(14) | not null
 abbrev   | character varying(2)  | not null
 birds_id | integer               | not null
Indexes:
    "state_abbrev_key" UNIQUE CONSTRAINT, btree (abbrev)
    "state_name_key" UNIQUE CONSTRAINT, btree (name)
Foreign-key constraints:
    "state_birds_id_fkey" FOREIGN KEY (birds_id) REFERENCES birds(birds_id)


In [22]:
!psql -c "SELECT * FROM state" 
      name      | abbrev | birds_id 
----------------+--------+----------
 Illinois       | IL     |        1
 Indiana        | IN     |        1
 Kentucky       | KY     |        1
 North Carolina | NC     |        1
 Ohio           | OH     |        1
 Virginia       | VA     |        1
 West Virginia  | WV     |        1
 Minnesota      | MN     |        2
(8 rows)


  • bug reports
  • test data sets
  • fixes
  • feature requests
In []: