In [1]:
import pandas as pd
import matplotlib.pyplot as plt
In [2]:
df = pd.read_csv("HEALTH_LVNG_13092016124120389.csv")[["Country", "Year", "Value"]]
In [3]:
mean = df[["Country", "Value"]].groupby("Country", sort=False).mean()
mean.columns = ["Mean"]
mean["Rank"] = mean.rank(ascending=False).astype(int)
mean = mean.sort_values(by="Mean", ascending=False).reset_index()
In [4]:
mean[["Rank", "Country", "Mean"]]
Out[4]:
Rank Country Mean
0 1 France 15.951111
1 2 Luxembourg 15.430189
2 3 Portugal 14.474510
3 4 Germany 13.867925
4 5 Spain 13.793878
5 6 Austria 13.596154
6 7 Italy 13.456000
7 8 Slovenia 12.950000
8 9 Switzerland 12.365455
9 10 Lithuania 12.350000
10 11 Hungary 12.253704
11 12 Belgium 11.742000
12 13 Slovak Republic 11.738182
13 14 Czech Republic 11.660000
14 15 Australia 10.955556
15 16 Denmark 10.637037
16 17 Estonia 10.532000
17 18 Korea 10.264815
18 19 Greece 9.993878
19 20 Ireland 9.941818
20 21 New Zealand 9.839583
21 22 United Kingdom 9.294444
22 23 Latvia 9.248485
23 24 Netherlands 9.209091
24 25 United States 9.051852
25 26 Chile 8.985417
26 27 Russia 8.909615
27 28 Poland 8.837037
28 29 Canada 8.683636
29 30 South Africa 8.082692
30 31 Finland 7.694545
31 32 Japan 7.436538
32 33 Sweden 6.512727
33 34 Norway 5.207273
34 35 Colombia 5.180392
35 36 Iceland 4.804000
36 37 Brazil 4.622449
37 38 Mexico 4.545455
38 39 Costa Rica 4.513725
39 40 Israel 3.123529
40 41 China (People's Republic of) 2.540000
41 42 Turkey 1.440000
42 43 India 1.380392
43 44 Indonesia 0.082000
In [5]:
import cufflinks as cf
cf.go_offline()
In [6]:
top = df.merge(mean[0:15], on="Country").pivot(values="Value", columns="Country", index="Year")
In [7]:
top.iplot(kind='scatter', title="Alcohol consumption per capita", xTitle="Year", yTitle="L of ethanol / person")