import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
pd.options.display.max_rows = 10
https://dataverse.harvard.edu/dataset.xhtml?persistentId=hdl:1902.1/12379
!cat ../datasets.md
http://www.transtats.bts.gov/Fields.asp?Table_ID=236 data/ARM.zip and http://nbviewer.ipython.org/urls/umich.box.com/shared/static/zyl08wsmxwoh6ts70v4o.ipynb
!open ..
df = pd.read_csv('../data/undata-213.tab.tsv', sep='\t',
parse_dates=['date'])
print('dtypes', df.info())
df.head()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1024539 entries, 0 to 1024538 Data columns (total 20 columns): rcid 1024539 non-null int64 session 1024539 non-null int64 date 1024539 non-null datetime64[ns] unres 992822 non-null object vote 1024539 non-null int64 ccode 1024539 non-null int64 uniquename 1024539 non-null object voetenoldcode 1024539 non-null int64 voetenname 1024539 non-null object voetenshortcode 1024539 non-null object cowshortcode 1024539 non-null object cowcode 1024539 non-null int64 cowlongname 1024539 non-null object aclpcode 998484 non-null float64 wdicode 990090 non-null object imfcode 973443 non-null float64 politycode 1024539 non-null int64 bankscode 977640 non-null float64 dpicode 998484 non-null object uncode 991104 non-null float64 dtypes: datetime64[ns](1), float64(4), int64(7), object(8) memory usage: 164.1+ MB dtypes None
rcid | session | date | unres | vote | ccode | uniquename | voetenoldcode | voetenname | voetenshortcode | cowshortcode | cowcode | cowlongname | aclpcode | wdicode | imfcode | politycode | bankscode | dpicode | uncode | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3 | 1 | 1946-01-01 | R/1/66 | 1 | 2 | United States of America | 2 | United States of America | USA | USA | 2 | United States of America | 65 | USA | 111 | 2 | 1220 | USA | 840 |
1 | 3 | 1 | 1946-01-01 | R/1/66 | 3 | 20 | Canada | 20 | Canada | CAN | CAN | 20 | Canada | 52 | CAN | 156 | 20 | 180 | CAN | 124 |
2 | 3 | 1 | 1946-01-01 | R/1/66 | 9 | 31 | Bahamas | 31 | Bahamas | BHM | BHM | 31 | Bahamas | 49 | BHS | 313 | 31 | 69 | BHS | 44 |
3 | 3 | 1 | 1946-01-01 | R/1/66 | 1 | 40 | Cuba | 40 | Cuba | CUB | CUB | 40 | Cuba | 154 | CUB | 928 | 40 | 280 | CUB | 192 |
4 | 3 | 1 | 1946-01-01 | R/1/66 | 1 | 41 | Haiti | 41 | Haiti | HAI | HAI | 41 | Haiti | 58 | HTI | 263 | 41 | 490 | HTI | 332 |
df = df[['rcid', 'session', 'date', 'unres', 'vote', 'uniquename']]
df = df.query("vote in (1, 3)")
x = df.set_index(['rcid', 'unres', 'uniquename'])
df['unres'] = df.unres.astype('category')
df['uniquename'] = df.uniquename.astype('category')
df.groupby('unres').uniquename.agg(lambda x: np.any(x == 'United States of America')).mean()
1.0
x = df.set_index(['rcid', 'unres', 'uniquename'])
y = df[['unres', 'vote', 'uniquename']].drop_duplicates()
y = y.set_index(['unres', 'uniquename'])
y = y.groupby(level=[0, 1]).first()
y.index.is_unique
True
us = y.xs('United States of America', level=1)
non_us = y.drop("United States of America", level='uniquename')
r = same.groupby(level='uniquename').vote.agg({'n': 'count', 'rate': 'mean'})
r.sort('rate')
rate | n | |
---|---|---|
uniquename | ||
North Korea | 0.058167 | 1255 |
Viet Nam | 0.089960 | 2968 |
Angola | 0.120015 | 2708 |
Seychelles | 0.122525 | 1616 |
Zimbabwe | 0.125528 | 2605 |
... | ... | ... |
Micronesia, Federated States of | 0.697598 | 916 |
United Kingdom | 0.711326 | 3523 |
Israel | 0.756575 | 3270 |
Taiwan | 0.756663 | 863 |
Palau | 0.841549 | 568 |
204 rows × 2 columns
sns.kdeplot(rate, shade=True).set_xlim(0, 1)
(0, 1)
rate.order().drop('United States of America').tail(15).plot(kind='barh', figsize=(5, 10))
<matplotlib.axes._subplots.AxesSubplot at 0x18cb939e8>
rate.order().drop('United States of America').head(15).plot(kind='barh', figsize=(5, 10))
<matplotlib.axes._subplots.AxesSubplot at 0x18cf25550>
us
unres R/1/66 1 R/1/79 3 R/1/98 3 R/1/107 3 R/1/295 3 .. ESS/5/101 1 ESS/5/102 2 ESS/5/41 2 ESS/5/97 1 ESS/5/100 1 Name: vote, dtype: int64
usvotes = x.vote.xs('United States of America', level='uniquename')
usvotes
rcid unres 3 R/1/66 1 4 R/1/79 3 5 R/1/98 3 6 R/1/107 3 7 R/1/295 3 .. 9052 ESS/5/101 1 9053 ESS/5/102 2 9054 ESS/5/41 2 9055 ESS/5/97 1 9056 ESS/5/100 1 Name: vote, dtype: int64
session | date | vote | |||
---|---|---|---|---|---|
rcid | unres | uniquename | |||
3 | R/1/66 | United States of America | 1 | 1946-01-01 | 1 |
Canada | 1 | 1946-01-01 | 3 | ||
Bahamas | 1 | 1946-01-01 | 9 | ||
Cuba | 1 | 1946-01-01 | 1 | ||
Haiti | 1 | 1946-01-01 | 1 | ||
... | ... | ... | ... | ... | ... |
9056 | ESS/5/100 | Nauru | 22 | 1967-07-05 | 9 |
Marshall Islands | 22 | 1967-07-05 | 9 | ||
Palau | 22 | 1967-07-05 | 9 | ||
Micronesia, Federated States of | 22 | 1967-07-05 | 9 | ||
Samoa | 22 | 1967-07-05 | 9 |
1024539 rows × 3 columns
x.groupby(level=[0, 1]).vote.transform(lambda x: x == usvotes)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-241-38866779498e> in <module>() ----> 1 x.groupby(level=[0, 1]).vote.transform(lambda x: x == usvotes) /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/groupby.py in transform(self, func, *args, **kwargs) 2454 2455 object.__setattr__(group, 'name', name) -> 2456 res = wrapper(group) 2457 2458 if hasattr(res, 'values'): /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/groupby.py in <lambda>(x) 2448 result = self._selected_obj.values.copy() 2449 -> 2450 wrapper = lambda x: func(x, *args, **kwargs) 2451 for i, (name, group) in enumerate(self): 2452 if name not in self.indices: <ipython-input-241-38866779498e> in <lambda>(x) ----> 1 x.groupby(level=[0, 1]).vote.transform(lambda x: x == usvotes) /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/ops.py in wrapper(self, other, axis) 580 name = _maybe_match_name(self, other) 581 if len(self) != len(other): --> 582 raise ValueError('Series lengths must match to compare') 583 return self._constructor(na_op(self.values, other.values), 584 index=self.index, name=name) ValueError: Series lengths must match to compare
debug
> /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/ops.py(582)wrapper() 581 if len(self) != len(other): --> 582 raise ValueError('Series lengths must match to compare') 583 return self._constructor(na_op(self.values, other.values), ipdb> u > <ipython-input-241-38866779498e>(1)<lambda>() ----> 1 x.groupby(level=[0, 1]).vote.transform(lambda x: x == usvotes) ipdb> d > /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/ops.py(582)wrapper() 581 if len(self) != len(other): --> 582 raise ValueError('Series lengths must match to compare') 583 return self._constructor(na_op(self.values, other.values), ipdb> self rcid unres uniquename 3 R/1/66 United States of America 1 Canada 3 Bahamas 9 Cuba 1 Haiti 1 .. Nauru 9 Marshall Islands 9 Palau 9 Micronesia, Federated States of 9 Samoa 9 Name: (3, R/1/66), dtype: int64 ipdb> other rcid unres 3 R/1/66 1 4 R/1/79 3 5 R/1/98 3 6 R/1/107 3 7 R/1/295 3 .. 9052 ESS/5/101 1 9053 ESS/5/102 2 9054 ESS/5/41 2 9055 ESS/5/97 1 9056 ESS/5/100 1 Name: vote, dtype: int64 ipdb> q
usvotes.groupby(level=1).count().value_counts()
1 5034 2 8 dtype: int64
x.groupby(level='unres').vote.agg(lambda x: (x == x.loc['United States of America']).mean())
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/groupby.py in agg_series(self, obj, func) 1583 try: -> 1584 return self._aggregate_series_fast(obj, func) 1585 except Exception: /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/groupby.py in _aggregate_series_fast(self, obj, func) 1591 if obj.index._has_complex_internals: -> 1592 raise TypeError('Incompatible index for Cython grouper') 1593 TypeError: Incompatible index for Cython grouper During handling of the above exception, another exception occurred: KeyError Traceback (most recent call last) /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/indexing.py in _has_valid_type(self, key, axis) 1286 if not key in ax: -> 1287 error() 1288 except (TypeError) as e: /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/indexing.py in error() 1281 raise KeyError("the label [%s] is not in the [%s]" % -> 1282 (key, self.obj._get_axis_name(axis))) 1283 KeyError: 'the label [United States of America] is not in the [index]' During handling of the above exception, another exception occurred: KeyError Traceback (most recent call last) /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/groupby.py in aggregate(self, func_or_funcs, *args, **kwargs) 2321 try: -> 2322 return self._python_agg_general(func_or_funcs, *args, **kwargs) 2323 except Exception: /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/groupby.py in _python_agg_general(self, func, *args, **kwargs) 1114 try: -> 1115 result, counts = self.grouper.agg_series(obj, f) 1116 output[name] = self._try_cast(result, obj) /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/groupby.py in agg_series(self, obj, func) 1585 except Exception: -> 1586 return self._aggregate_series_pure_python(obj, func) 1587 /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/groupby.py in _aggregate_series_pure_python(self, obj, func) 1615 for label, group in splitter: -> 1616 res = func(group) 1617 if result is None: /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/groupby.py in <lambda>(x) 1108 func = _intercept_function(func) -> 1109 f = lambda x: func(x, *args, **kwargs) 1110 <ipython-input-186-8959b63664c7> in <lambda>(x) ----> 1 x.groupby(level='unres').vote.agg(lambda x: (x == x.loc['United States of America']).mean()) /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/indexing.py in __getitem__(self, key) 1188 else: -> 1189 return self._getitem_axis(key, axis=0) 1190 /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis) 1332 # fall thru to straight lookup -> 1333 self._has_valid_type(key, axis) 1334 return self._get_label(key, axis=axis) /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/indexing.py in _has_valid_type(self, key, axis) 1294 except: -> 1295 error() 1296 /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/indexing.py in error() 1281 raise KeyError("the label [%s] is not in the [%s]" % -> 1282 (key, self.obj._get_axis_name(axis))) 1283 KeyError: 'the label [United States of America] is not in the [index]' During handling of the above exception, another exception occurred: KeyError Traceback (most recent call last) /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/indexing.py in _has_valid_type(self, key, axis) 1286 if not key in ax: -> 1287 error() 1288 except (TypeError) as e: /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/indexing.py in error() 1281 raise KeyError("the label [%s] is not in the [%s]" % -> 1282 (key, self.obj._get_axis_name(axis))) 1283 KeyError: 'the label [United States of America] is not in the [index]' During handling of the above exception, another exception occurred: KeyError Traceback (most recent call last) <ipython-input-186-8959b63664c7> in <module>() ----> 1 x.groupby(level='unres').vote.agg(lambda x: (x == x.loc['United States of America']).mean()) /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/groupby.py in agg(self, func, *args, **kwargs) 683 @Appender(_agg_doc) 684 def agg(self, func, *args, **kwargs): --> 685 return self.aggregate(func, *args, **kwargs) 686 687 def _iterate_slices(self): /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/groupby.py in aggregate(self, func_or_funcs, *args, **kwargs) 2322 return self._python_agg_general(func_or_funcs, *args, **kwargs) 2323 except Exception: -> 2324 result = self._aggregate_named(func_or_funcs, *args, **kwargs) 2325 2326 index = Index(sorted(result), name=self.grouper.names[0]) /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/groupby.py in _aggregate_named(self, func, *args, **kwargs) 2409 for name, group in self: 2410 group.name = name -> 2411 output = func(group, *args, **kwargs) 2412 if isinstance(output, (Series, Index, np.ndarray)): 2413 raise Exception('Must produce aggregated value') <ipython-input-186-8959b63664c7> in <lambda>(x) ----> 1 x.groupby(level='unres').vote.agg(lambda x: (x == x.loc['United States of America']).mean()) /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/indexing.py in __getitem__(self, key) 1187 return self._getitem_tuple(key) 1188 else: -> 1189 return self._getitem_axis(key, axis=0) 1190 1191 def _getitem_axis(self, key, axis=0): /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis) 1331 1332 # fall thru to straight lookup -> 1333 self._has_valid_type(key, axis) 1334 return self._get_label(key, axis=axis) 1335 /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/indexing.py in _has_valid_type(self, key, axis) 1293 raise 1294 except: -> 1295 error() 1296 1297 return True /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/indexing.py in error() 1280 "cannot use label indexing with a null key") 1281 raise KeyError("the label [%s] is not in the [%s]" % -> 1282 (key, self.obj._get_axis_name(axis))) 1283 1284 try: KeyError: 'the label [United States of America] is not in the [index]'
x = df.set_index(['unres', 'uniquename']a)
x.loc['R/1/435', 'United States of America']
/Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/core/index.py:5091: PerformanceWarning: indexing past lexsort depth may impact performance. PerformanceWarning)
rcid | session | date | vote | ||
---|---|---|---|---|---|
unres | uniquename | ||||
R/1/435 | United States of America | 14 | 1 | 1946-02-01 | 3 |
United States of America | 15 | 1 | 1946-02-01 | 3 |
dd = df.drop_duplicates()
dd.set_index(['unres', ''])
x[x.index.duplicated()]d()
rcid | session | date | vote | ||
---|---|---|---|---|---|
unres | uniquename | ||||
R/1/435 | United States of America | 15 | 1 | 1946-02-01 | 3 |
Canada | 15 | 1 | 1946-02-01 | 3 | |
Bahamas | 15 | 1 | 1946-02-01 | 9 | |
Cuba | 15 | 1 | 1946-02-01 | 8 | |
Haiti | 15 | 1 | 1946-02-01 | 8 | |
... | ... | ... | ... | ... | ... |
ESS/5/100 | Nauru | 9056 | 22 | 1967-07-05 | 9 |
Marshall Islands | 9056 | 22 | 1967-07-05 | 9 | |
Palau | 9056 | 22 | 1967-07-05 | 9 | |
Micronesia, Federated States of | 9056 | 22 | 1967-07-05 | 9 | |
Samoa | 9056 | 22 | 1967-07-05 | 9 |
33095 rows × 4 columns
df.vote.value_counts().plot(kind='bar', width=.9)
<matplotlib.axes._subplots.AxesSubplot at 0x116c4f898>
desc = pd.read_csv("../data/descriptions-213.tab.tsv")
--------------------------------------------------------------------------- CParserError Traceback (most recent call last) <ipython-input-25-59ac8dfcf55d> in <module>() ----> 1 desc = pd.read_csv("../data/descriptions-213.tab.tsv") /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines) 472 skip_blank_lines=skip_blank_lines) 473 --> 474 return _read(filepath_or_buffer, kwds) 475 476 parser_f.__name__ = name /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds) 258 return parser 259 --> 260 return parser.read() 261 262 _parser_defaults = { /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/io/parsers.py in read(self, nrows) 719 raise ValueError('skip_footer not supported for iteration') 720 --> 721 ret = self._engine.read(nrows) 722 723 if self.options.get('as_recarray'): /Users/tom.augspurger/Envs/py3/lib/python3.4/site-packages/pandas/io/parsers.py in read(self, nrows) 1168 1169 try: -> 1170 data = self._reader.read(nrows) 1171 except StopIteration: 1172 if nrows is None: pandas/parser.pyx in pandas.parser.TextReader.read (pandas/parser.c:7544)() pandas/parser.pyx in pandas.parser.TextReader._read_low_memory (pandas/parser.c:7784)() pandas/parser.pyx in pandas.parser.TextReader._read_rows (pandas/parser.c:8401)() pandas/parser.pyx in pandas.parser.TextReader._tokenize_rows (pandas/parser.c:8275)() pandas/parser.pyx in pandas.parser.raise_parser_error (pandas/parser.c:20691)() CParserError: Error tokenizing data. C error: Expected 3 fields in line 9, saw 4
!head -n 10 ../data/descriptions-213.tab.tsv
session rcid abstain yes no importantvote date unres amend para short descr me nu di hr co ec 1 3 4 29 18 "." 1946-01-01 "R/1/66" 1 0 "AMENDMENTS, RULES OF PROCEDURE" "TO ADOPT A CUBAN AMENDMENT TO THE UK PROPOSAL REFERRING THE PROVISIONAL RULES OF PROCEDURE AND ANY AMENDMENTS THEREOF TO THE 6TH COMMITTEE, SAID AMENDMENT PRESCRIBING A 1-WEEK TIME LIMIT WITHIN WHICH THE 6TH COMM. MUST SUBMIT ITS REPORT ON THE" 0 0 0 0 0 0 1 4 8 9 34 "." 1946-01-02 "R/1/79" 0 0 "SECURITY COUNCIL ELECTIONS" "TO ADOPT A USSR PROPOSAL ADJOURNING DEBATE ON AND POSTPONINGELECTIONS OF THE NON-PERMANENT MEMBERS OF THE SECURITY COUNCIL, TO THE FOLLOWING WEEK." 0 0 0 0 0 0 1 5 1 28 22 "." 1946-01-04 "R/1/98" 0 0 "VOTING PROCEDURE" "TO ADOPT THE KOREAN PROPOSAL THAT INVALID BALLOTS BE INCLUDED IN THE TOTAL NUMBER OF \MEMBERS PRESENT AND VOTING\\", IN CALCULATING THE MAJORITY VOTE.\"" 0 0 0 0 0 0 1 6 10 12 27 "." 1946-01-04 "R/1/107" 0 0 "DECLARATION OF HUMAN RIGHTS" "TO ADOPT A CUBAN PROPOSAL (A/3-C) THAT AN ITEM ON A DECLARATION OF THE RIGHTS AND DUTIES OF MAN BE TABLED." 0 0 0 1 0 0 1 7 0 25 18 "." 1946-01-02 "R/1/295" 1 0 "GENERAL ASSEMBLY ELECTIONS" "TO ADOPT A 6TH COMMITTEE AMENDMENT (A/14) TO THE PROVISIONAL RULES OF PROCEDURE, WHICH AMENDMENT PROVIDES THAT RULE 73 SHOULD END WITH:\THERE SHALL BE NO NOMINATIONS.\\"" 0 0 0 0 0 0 1 8 2 38 1 "." 1946-01-05 "R/1/297" 1 0 "ECOSOC POWERS" "TO ADOPT A SECOND 6TH COMM. AMENDMENT (A/14) TO THE PROVISIONAL RULES OF PROCEDURE, WHICH AMENDMENT EPLACES PROVISIONAL RULE T WITH A NEW TEXT AUTHORIZING THE ECONOMIC & SOC. COUNCIL TO CALL INTERNATIONAL CONFERENCES ON ANY MATTER WITHIN ITS CO" 0 0 0 0 0 1 1 9 2 45 0 "." 1946-02-05 "R/1/329" 0 0 "POST-WAR RECONSTRUCTION" "TO OPEN THE DISCUSSION ON THE POLISH DRAFT RESOLUTION (A/22) PROVIDING FOR ECO. & SOC. COUNCIL STUDY AND ASSEMBLY DISCUSSION OF THE RECONSTRUCTION OF U.N. MEMBER-COUNTRIES DEVASTATED BY THE WAR." 0 0 0 0 0 0 1 10 0 46 2 "." 1946-02-05 "R/1/361" 1 1 "U.N. MEMBERS, RELATIONS WITH SPAIN" "TO ADOPT GENERAL COMM. DRAFT RESOLUTION (A/40) AS AMENDED BY A NORWEGIAN ORAL PROPOSAL (REPLACING \TAKE INTO ACCOUNT THE LETTER AND SPIRIT\\" IN 3RD PARAGRAPH WITH, \\"ACT IN ACCORDANCE WITH THE LETTER AND THE SPIRIT\\"), RECOMMENDING UN MEMBERS T" 0 0 0 0 0 0 1 11 0 41 0 "." 1946-02-05 "R/1/376" 0 0 "TRUSTEESHIP AMENDMENTS" "TO ADOPT DRAFT RESOLUTIONS I AND II AS A WHOLE, OF THE 4TH COMM. REPORT (A/34) ON NON-SELF-GOVERNING TERRITORIES. RESOLUTION I PROVIDES TO PROMOTE THE POLITICAL, SOCIAL, ECONOMIC, AND EDUCATIONAL ASPIRATIONS OF NON-SELF-GOVERNING PEOPLES THROUG" 0 0 0 0 1 0
Goto command line!
!head -n 10 ../data/descriptions-213.tab.tsv | mvim - # may not work for you
pd.read_csv('../data/descriptions-213.tab.tsv', sep='\t', nrows=10, error_bad_lines=False)
b'Skipping line 6: expected 18 fields, saw 24\n'
session | rcid | abstain | yes | no | importantvote | date | unres | amend | para | short | descr | me | nu | di | hr | co | ec | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 3 | 4 | 29 | 18 | . | 1946-01-01 | R/1/66 | 1 | 0 | AMENDMENTS, RULES OF PROCEDURE | TO ADOPT A CUBAN AMENDMENT TO THE UK PROPOSAL ... | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 1 | 4 | 8 | 9 | 34 | . | 1946-01-02 | R/1/79 | 0 | 0 | SECURITY COUNCIL ELECTIONS | TO ADOPT A USSR PROPOSAL ADJOURNING DEBATE ON ... | 0 | 0 | 0 | 0 | 0 | 0 |
2 | 1 | 5 | 1 | 28 | 22 | . | 1946-01-04 | R/1/98 | 0 | 0 | VOTING PROCEDURE | TO ADOPT THE KOREAN PROPOSAL THAT INVALID BALL... | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 1 | 6 | 10 | 12 | 27 | . | 1946-01-04 | R/1/107 | 0 | 0 | DECLARATION OF HUMAN RIGHTS | TO ADOPT A CUBAN PROPOSAL (A/3-C) THAT AN ITEM... | 0 | 0 | 0 | 1 | 0 | 0 |
4 | 1 | 9 | 2 | 45 | 0 | . | 1946-02-05 | R/1/329 | 0 | 0 | POST-WAR RECONSTRUCTION | TO OPEN THE DISCUSSION ON THE POLISH DRAFT RES... | 0 | 0 | 0 | 0 | 0 | 0 |
5 | 1 | 10 | 0 | 46 | 2 | . | 1946-02-05 | R/1/361 | 1 | 1 | U.N. MEMBERS, RELATIONS WITH SPAIN | TO ADOPT GENERAL COMM. DRAFT RESOLUTION (A/40)... | 0 | 0 | 0 | 0 | 0 | 0 |
6 | 1 | 11 | 0 | 41 | 0 | . | 1946-02-05 | R/1/376 | 0 | 0 | TRUSTEESHIP AMENDMENTS | TO ADOPT DRAFT RESOLUTIONS I AND II AS A WHOLE... | 0 | 0 | 0 | 0 | 1 | 0 |
7 | 1 | 12 | 3 | 21 | 22 | . | 1946-02-06 | R/1/394 | 1 | 1 | COUNCIL MEMBER TERM LENGTH | TO ADOPT PARAGRAPH (A) OF THE 6TH COMM. DRAFT ... | 0 | 0 | 0 | 0 | 0 | 0 |
8 | 1 | 13 | 2 | 10 | 31 | . | 1946-02-01 | R/1/434 | 1 | 1 | PROPAGANDA, REFUGEE CAMPS | TO ADOPT USSR AMMENDMENT (A/C.3/19) TO A 3RD C... | 0 | 0 | 0 | 0 | 0 | 0 |
9 | 1 | 14 | 5 | 7 | 30 | . | 1946-02-01 | R/1/435 | 1 | 1 | PERSONNEL, REFUGEE CAMPS | TO ADOPT A USSR AMENDMENT (A/C.3/19) TO 3RD CO... | 0 | 0 | 0 | 0 | 0 | 0 |
desc = pd.read_csv("../data/descriptions-213.tab.tsv", sep='\t', quoting=3, error_bad_lines=False,
parse_dates=['date'])
desc.head()
b'Skipping line 1428: expected 18 fields, saw 24\n'
session | rcid | abstain | yes | no | importantvote | date | unres | amend | para | short | descr | me | nu | di | hr | co | ec | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 3 | 4 | 29 | 18 | "." | 1946-01-01 | "R/1/66" | 1 | 0 | "AMENDMENTS, RULES OF PROCEDURE" | "TO ADOPT A CUBAN AMENDMENT TO THE UK PROPOSAL... | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 1 | 4 | 8 | 9 | 34 | "." | 1946-01-02 | "R/1/79" | 0 | 0 | "SECURITY COUNCIL ELECTIONS" | "TO ADOPT A USSR PROPOSAL ADJOURNING DEBATE ON... | 0 | 0 | 0 | 0 | 0 | 0 |
2 | 1 | 5 | 1 | 28 | 22 | "." | 1946-01-04 | "R/1/98" | 0 | 0 | "VOTING PROCEDURE" | "TO ADOPT THE KOREAN PROPOSAL THAT INVALID BAL... | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 1 | 6 | 10 | 12 | 27 | "." | 1946-01-04 | "R/1/107" | 0 | 0 | "DECLARATION OF HUMAN RIGHTS" | "TO ADOPT A CUBAN PROPOSAL (A/3-C) THAT AN ITE... | 0 | 0 | 0 | 1 | 0 | 0 |
4 | 1 | 7 | 0 | 25 | 18 | "." | 1946-01-02 | "R/1/295" | 1 | 0 | "GENERAL ASSEMBLY ELECTIONS" | "TO ADOPT A 6TH COMMITTEE AMENDMENT (A/14) TO ... | 0 | 0 | 0 | 0 | 0 | 0 |
strcols = desc.dtypes[desc.dtypes == object].index
desc[strcols] = desc[strcols].apply(lambda s: s.str.strip('"'))
desc.importantvote.unique()
array(['.', '1', '0'], dtype=object)
desc['importantvote'] = desc.importantvote.map({'.': np.nan, '1': True, '0': False})
df['unres'] = df.unres.astype('category')
%timeit df.groupby('unres').vote.nunique()
1 loops, best of 3: 437 ms per loop
Number of resolutions per year?
df.groupby(df.date.dt.year).unres.nunique().plot()
<matplotlib.axes._subplots.AxesSubplot at 0x13c4d3e10>
df.groupby(['date', 'unres'])
rcid | session | date | unres | vote | uniquename | |
---|---|---|---|---|---|---|
0 | 3 | 1 | 1946-01-01 | R/1/66 | 1 | United States of America |
1 | 3 | 1 | 1946-01-01 | R/1/66 | 3 | Canada |
2 | 3 | 1 | 1946-01-01 | R/1/66 | 9 | Bahamas |
3 | 3 | 1 | 1946-01-01 | R/1/66 | 1 | Cuba |
4 | 3 | 1 | 1946-01-01 | R/1/66 | 1 | Haiti |
... | ... | ... | ... | ... | ... | ... |
1024534 | 9056 | 22 | 1967-07-05 | ESS/5/100 | 9 | Nauru |
1024535 | 9056 | 22 | 1967-07-05 | ESS/5/100 | 9 | Marshall Islands |
1024536 | 9056 | 22 | 1967-07-05 | ESS/5/100 | 9 | Palau |
1024537 | 9056 | 22 | 1967-07-05 | ESS/5/100 | 9 | Micronesia, Federated States of |
1024538 | 9056 | 22 | 1967-07-05 | ESS/5/100 | 9 | Samoa |
1024539 rows × 6 columns
df.head()
rcid | session | date | unres | vote | uniquename | |
---|---|---|---|---|---|---|
0 | 3 | 1 | 1946-01-01 | R/1/66 | 1 | United States of America |
1 | 3 | 1 | 1946-01-01 | R/1/66 | 3 | Canada |
2 | 3 | 1 | 1946-01-01 | R/1/66 | 9 | Bahamas |
3 | 3 | 1 | 1946-01-01 | R/1/66 | 1 | Cuba |
4 | 3 | 1 | 1946-01-01 | R/1/66 | 1 | Haiti |
desc
session | rcid | abstain | yes | no | importantvote | date | unres | amend | para | short | descr | me | nu | di | hr | co | ec | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 3 | 4 | 29 | 18 | NaN | 1946-01-01 | R/1/66 | 1 | 0 | AMENDMENTS, RULES OF PROCEDURE | TO ADOPT A CUBAN AMENDMENT TO THE UK PROPOSAL ... | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 1 | 4 | 8 | 9 | 34 | NaN | 1946-01-02 | R/1/79 | 0 | 0 | SECURITY COUNCIL ELECTIONS | TO ADOPT A USSR PROPOSAL ADJOURNING DEBATE ON ... | 0 | 0 | 0 | 0 | 0 | 0 |
2 | 1 | 5 | 1 | 28 | 22 | NaN | 1946-01-04 | R/1/98 | 0 | 0 | VOTING PROCEDURE | TO ADOPT THE KOREAN PROPOSAL THAT INVALID BALL... | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 1 | 6 | 10 | 12 | 27 | NaN | 1946-01-04 | R/1/107 | 0 | 0 | DECLARATION OF HUMAN RIGHTS | TO ADOPT A CUBAN PROPOSAL (A/3-C) THAT AN ITEM... | 0 | 0 | 0 | 1 | 0 | 0 |
4 | 1 | 7 | 0 | 25 | 18 | NaN | 1946-01-02 | R/1/295 | 1 | 0 | GENERAL ASSEMBLY ELECTIONS | TO ADOPT A 6TH COMMITTEE AMENDMENT (A/14) TO T... | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5205 | 22 | 9052 | 2 | 116 | 0 | NaN | 1967-07-02 | ESS/5/101 | 0 | 0 | ISRAEL, UNRWA | TO ADOPT DRAFT RESOL. (A/L.526/ADD.3) WELCOMIN... | 1 | 0 | 0 | 0 | 0 | 0 |
5206 | 22 | 9053 | 20 | 99 | 0 | NaN | 1967-07-02 | ESS/5/102 | 0 | 0 | JERUSALEM, STATUS | TO ADOPT DRAFT RESOL. (A/L.527/REV.1) CALLING ... | 1 | 0 | 0 | 0 | 0 | 0 |
5207 | 22 | 9054 | 18 | 99 | 0 | NaN | 1967-07-05 | ESS/5/41 | 0 | 0 | JERUSALEM, STATUS | TO ADOPT DRAFT RESOL. (A/528/REV.2) DEPLORING ... | 1 | 0 | 0 | 0 | 0 | 0 |
5208 | 22 | 9055 | 27 | 62 | 27 | NaN | 1967-07-05 | ESS/5/97 | 0 | 1 | SPECIAL SESSION ADJOURNMENT | TO ADOPT OPERATIVE PARAG. 1 OF DRAFT RESOL. (A... | 0 | 0 | 0 | 0 | 0 | 0 |
5209 | 22 | 9056 | 27 | 63 | 26 | NaN | 1967-07-05 | ESS/5/100 | 0 | 0 | SPECIAL SESSION, SECURITY COUNCIL | TO ADOPT DRAFT RESOL. (A/L.529/REV.1) FORWARDI... | 1 | 0 | 0 | 0 | 0 | 0 |
5210 rows × 18 columns