In [1]:
import pandas as pd
In [2]:
df = pd.read_csv("http://math-info.hse.ru/f/2018-19/spb-python/movie_metadata.csv")
In [3]:
df.columns
Out[3]:
Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',
       'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
       'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
       'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
       'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
       'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
       'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
       'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],
      dtype='object')
In [4]:
df = df[:4000]
In [5]:
df.mean()
Out[5]:
num_critic_for_reviews       1.548648e+02
duration                     1.098988e+02
director_facebook_likes      7.791808e+02
actor_3_facebook_likes       7.467102e+02
actor_1_facebook_likes       7.383797e+03
gross                        5.476311e+07
num_voted_users              9.785604e+04
cast_total_facebook_likes    1.103358e+04
facenumber_in_poster         1.405710e+00
num_user_for_reviews         3.100935e+02
budget                       4.771988e+07
title_year                   2.003095e+03
actor_2_facebook_likes       1.934134e+03
imdb_score                   6.442275e+00
aspect_ratio                 2.222510e+00
movie_facebook_likes         8.679390e+03
dtype: float64
In [6]:
df.describe()
Out[6]:
num_critic_for_reviews duration director_facebook_likes actor_3_facebook_likes actor_1_facebook_likes gross num_voted_users cast_total_facebook_likes facenumber_in_poster num_user_for_reviews budget title_year actor_2_facebook_likes imdb_score aspect_ratio movie_facebook_likes
count 3986.000000 3993.000000 3917.000000 3992.000000 4000.000000 3.562000e+03 4.000000e+03 4000.000000 3993.000000 3991.000000 3.696000e+03 3913.000000 3996.000000 4000.000000 3885.000000 4000.000000
mean 154.864777 109.898823 779.180751 746.710170 7383.797500 5.476311e+07 9.785604e+04 11033.578250 1.405710 310.093460 4.771988e+07 2003.095068 1934.134384 6.442275 2.222510 8679.390000
std 123.465109 25.926688 3013.754789 1833.629629 14554.050377 7.104755e+07 1.485450e+05 18143.364665 2.052587 400.012601 2.272561e+08 10.399026 4401.008671 1.085949 1.206606 20929.279758
min 1.000000 7.000000 0.000000 0.000000 0.000000 1.620000e+02 6.000000e+00 0.000000 0.000000 1.000000 1.300000e+04 1927.000000 0.000000 1.600000 1.180000 0.000000
25% 64.000000 95.000000 10.000000 188.000000 741.750000 1.054060e+07 1.523075e+04 1875.500000 0.000000 91.000000 1.200000e+07 1999.000000 379.500000 5.800000 1.850000 0.000000
50% 125.000000 106.000000 58.000000 433.000000 1000.000000 3.203155e+07 4.623000e+04 3897.500000 1.000000 186.000000 2.500000e+07 2005.000000 680.000000 6.500000 2.350000 206.500000
75% 211.000000 120.000000 226.000000 688.000000 12000.000000 7.011318e+07 1.144435e+05 15765.250000 2.000000 371.000000 5.200000e+07 2010.000000 968.000000 7.200000 2.350000 10000.000000
max 813.000000 511.000000 23000.000000 23000.000000 640000.000000 7.605058e+08 1.689764e+06 656730.000000 43.000000 5060.000000 1.221550e+10 2016.000000 137000.000000 9.500000 16.000000 349000.000000
In [8]:
df.dtypes
Out[8]:
color                         object
director_name                 object
num_critic_for_reviews       float64
duration                     float64
director_facebook_likes      float64
actor_3_facebook_likes       float64
actor_2_name                  object
actor_1_facebook_likes       float64
gross                        float64
genres                        object
actor_1_name                  object
movie_title                   object
num_voted_users                int64
cast_total_facebook_likes      int64
actor_3_name                  object
facenumber_in_poster         float64
plot_keywords                 object
movie_imdb_link               object
num_user_for_reviews         float64
language                      object
country                       object
content_rating                object
budget                       float64
title_year                   float64
actor_2_facebook_likes       float64
imdb_score                   float64
aspect_ratio                 float64
movie_facebook_likes           int64
dtype: object
In [7]:
df.describe(include=['object'])
Out[7]:
color director_name actor_2_name genres actor_1_name movie_title actor_3_name plot_keywords movie_imdb_link language country content_rating
count 3989 3917 3996 4000 4000 4000 3992 3940 4000 3994 3996 3882
unique 2 1752 2312 803 1525 3921 2741 3863 3923 35 51 18
top Color Steven Spielberg Morgan Freeman Comedy Robert De Niro Pan Steve Coogan eighteen wheeler|illegal street racing|truck|t... http://www.imdb.com/title/tt3332064/?ref_=fn_t... English USA R
freq 3874 26 19 161 48 3 8 3 3 3781 3036 1665
In [10]:
df['director_name']
Out[10]:
0             James Cameron
1            Gore Verbinski
2                Sam Mendes
3         Christopher Nolan
4               Doug Walker
5            Andrew Stanton
6                 Sam Raimi
7              Nathan Greno
8               Joss Whedon
9               David Yates
10              Zack Snyder
11             Bryan Singer
12             Marc Forster
13           Gore Verbinski
14           Gore Verbinski
15              Zack Snyder
16           Andrew Adamson
17              Joss Whedon
18             Rob Marshall
19         Barry Sonnenfeld
20            Peter Jackson
21                Marc Webb
22             Ridley Scott
23            Peter Jackson
24              Chris Weitz
25            Peter Jackson
26            James Cameron
27            Anthony Russo
28               Peter Berg
29          Colin Trevorrow
               ...         
3970         Victor Fleming
3971        Richard Raymond
3972          Sam Peckinpah
3973            Alan Jacobs
3974          George Sidney
3975     Christopher Morris
3976    Andrucha Waddington
3977        Peter Stebbings
3978      Vincente Minnelli
3979             Dagur Kári
3980        Nicholas Hytner
3981     Jaume Collet-Serra
3982                    NaN
3983       John Schlesinger
3984         Peter Cattaneo
3985           Jim Abrahams
3986         Kevin Carraway
3987           F. Gary Gray
3988          Albert Hughes
3989                    NaN
3990        Michael Gornick
3991          Sam Peckinpah
3992         Richard Brooks
3993         Fred Zinnemann
3994            Jack Smight
3995       Alfred Hitchcock
3996          Robert Eggers
3997         Michael Martin
3998           Edward Burns
3999            Bill Condon
Name: director_name, Length: 4000, dtype: object
In [11]:
df.director_name
Out[11]:
0             James Cameron
1            Gore Verbinski
2                Sam Mendes
3         Christopher Nolan
4               Doug Walker
5            Andrew Stanton
6                 Sam Raimi
7              Nathan Greno
8               Joss Whedon
9               David Yates
10              Zack Snyder
11             Bryan Singer
12             Marc Forster
13           Gore Verbinski
14           Gore Verbinski
15              Zack Snyder
16           Andrew Adamson
17              Joss Whedon
18             Rob Marshall
19         Barry Sonnenfeld
20            Peter Jackson
21                Marc Webb
22             Ridley Scott
23            Peter Jackson
24              Chris Weitz
25            Peter Jackson
26            James Cameron
27            Anthony Russo
28               Peter Berg
29          Colin Trevorrow
               ...         
3970         Victor Fleming
3971        Richard Raymond
3972          Sam Peckinpah
3973            Alan Jacobs
3974          George Sidney
3975     Christopher Morris
3976    Andrucha Waddington
3977        Peter Stebbings
3978      Vincente Minnelli
3979             Dagur Kári
3980        Nicholas Hytner
3981     Jaume Collet-Serra
3982                    NaN
3983       John Schlesinger
3984         Peter Cattaneo
3985           Jim Abrahams
3986         Kevin Carraway
3987           F. Gary Gray
3988          Albert Hughes
3989                    NaN
3990        Michael Gornick
3991          Sam Peckinpah
3992         Richard Brooks
3993         Fred Zinnemann
3994            Jack Smight
3995       Alfred Hitchcock
3996          Robert Eggers
3997         Michael Martin
3998           Edward Burns
3999            Bill Condon
Name: director_name, Length: 4000, dtype: object
In [9]:
df[df.director_name.isna()]
Out[9]:
color director_name num_critic_for_reviews duration director_facebook_likes actor_3_facebook_likes actor_2_name actor_1_facebook_likes gross genres ... num_user_for_reviews language country content_rating budget title_year actor_2_facebook_likes imdb_score aspect_ratio movie_facebook_likes
177 Color NaN 21.0 60.0 NaN 184.0 Philip Michael Thomas 982.0 NaN Action|Crime|Drama|Mystery|Thriller ... 74.0 English USA TV-14 1500000.0 NaN 321.0 7.5 1.33 0
260 Color NaN 29.0 60.0 NaN 432.0 Dirk Benedict 669.0 NaN Action|Adventure|Crime ... 97.0 English USA TV-PG NaN NaN 554.0 7.6 4.00 0
404 Color NaN 103.0 44.0 NaN 148.0 Scott Thompson 544.0 NaN Crime|Drama|Horror|Mystery|Thriller ... 270.0 English USA TV-14 NaN NaN 183.0 8.6 1.78 59000
459 NaN NaN 95.0 54.0 NaN 0.0 Royce Johnson 577.0 NaN Action|Adventure|Crime|Drama|Sci-Fi|Thriller ... 394.0 English USA TV-MA NaN NaN 4.0 8.8 16.00 55000
479 Black and White NaN 31.0 25.0 NaN 474.0 Agnes Moorehead 1000.0 NaN Comedy|Family|Fantasy ... 71.0 English USA TV-G NaN NaN 960.0 7.6 4.00 0
537 Color NaN 12.0 43.0 NaN 218.0 Matt Ryan 1000.0 NaN Drama|Fantasy|Horror|Thriller ... 94.0 English USA TV-14 NaN NaN 560.0 7.5 16.00 10000
543 Color NaN 12.0 45.0 NaN 0.0 Brent Sexton 374.0 NaN Crime|Drama|Mystery ... 67.0 English USA NaN NaN NaN 130.0 8.3 NaN 0
593 Color NaN 39.0 45.0 NaN 160.0 Katia Winter 898.0 NaN Adventure|Drama|Fantasy|Mystery|Thriller ... 110.0 English USA TV-14 NaN NaN 372.0 7.5 16.00 14000
645 Color NaN 4.0 30.0 NaN 359.0 Kaitlyn Dever 995.0 NaN Comedy ... 87.0 English USA TV-PG NaN NaN 363.0 7.4 16.00 0
685 Color NaN 14.0 60.0 NaN 575.0 James Nesbitt 1000.0 NaN Crime|Drama|Mystery ... 28.0 English UK NaN NaN NaN 773.0 8.1 NaN 0
757 Color NaN 8.0 22.0 NaN 402.0 Oliver Hudson 732.0 NaN Comedy|Romance ... 41.0 English USA TV-PG NaN NaN 607.0 7.3 1.78 0
816 Color NaN 20.0 22.0 NaN 271.0 Soleil Moon Frye 870.0 NaN Comedy|Family|Fantasy ... 94.0 English USA TV-G 3000000.0 NaN 558.0 6.6 1.33 990
826 Color NaN 46.0 30.0 NaN 479.0 Kristin Davis 962.0 NaN Comedy|Romance ... 238.0 English USA TV-MA NaN NaN 722.0 7.0 1.33 0
833 Color NaN 26.0 22.0 NaN 676.0 Noureen DeWulf 883.0 NaN Comedy|Romance ... 54.0 English USA NaN NaN NaN 701.0 6.7 16.00 0
857 Color NaN 77.0 44.0 NaN 72.0 Don S. Davis 847.0 NaN Action|Adventure|Drama|Sci-Fi ... 181.0 English USA TV-14 1400000.0 NaN 440.0 8.4 1.33 0
962 Color NaN 2.0 45.0 NaN 132.0 Gemma Jones 416.0 NaN Crime|Drama ... 9.0 English UK NaN NaN NaN 171.0 7.9 NaN 0
994 Color NaN 14.0 105.0 NaN 5.0 Bruce Alexander 325.0 NaN Crime|Drama|Mystery ... 33.0 English UK NaN NaN NaN 7.0 7.8 1.33 361
1007 Color NaN 4.0 60.0 NaN 398.0 Brittany Curran 629.0 NaN Drama|Mystery|Thriller ... 22.0 English USA NaN NaN NaN 512.0 7.5 16.00 915
1023 Color NaN 28.0 43.0 NaN 600.0 Tony Curran 3000.0 NaN Action|Drama|Sci-Fi ... 149.0 English USA TV-MA NaN NaN 845.0 7.0 1.78 0
1035 Color NaN 30.0 64.0 NaN 184.0 Graham McTavish 1000.0 NaN Drama|Romance|Sci-Fi ... 148.0 English USA TV-MA NaN NaN 531.0 8.5 16.00 31000
1133 Color NaN 23.0 52.0 NaN 114.0 Clotilde Hesme 164.0 NaN Drama|Fantasy|Horror|Mystery ... 47.0 French France TV-MA NaN NaN 116.0 8.3 16.00 17000
1175 Black and White NaN 4.0 30.0 NaN 253.0 Gavin MacLeod 870.0 NaN Comedy|War ... 18.0 English USA TV-G NaN NaN 284.0 7.5 4.00 455
1265 Color NaN 3.0 30.0 NaN 12.0 Melissa Altro 51.0 NaN Animation|Comedy|Family ... 43.0 English Canada TV-Y NaN NaN 21.0 7.4 1.33 301
1346 Color NaN 22.0 60.0 NaN 501.0 Wayne Knight 23000.0 NaN Comedy|Family|Sci-Fi ... 86.0 English USA TV-PG NaN NaN 967.0 7.8 1.33 0
1404 Color NaN 5.0 43.0 NaN 452.0 Aimee Garcia 778.0 NaN Action|Comedy|Crime ... 30.0 English USA TV-14 NaN NaN 618.0 5.8 16.00 0
1444 Color NaN 3.0 60.0 NaN 628.0 Kimberly Elise 897.0 NaN Drama|Romance ... 11.0 English USA NaN NaN NaN 637.0 7.0 NaN 265
1499 Color NaN 51.0 60.0 NaN 256.0 Indira Varma 2000.0 NaN Crime|Drama|Mystery|Thriller ... 105.0 English UK TV-MA NaN NaN 729.0 8.6 16.00 20000
1620 Color NaN 51.0 44.0 NaN 690.0 Aimee Teegarden 971.0 NaN Drama|Sport ... 257.0 English USA TV-14 NaN NaN 741.0 8.7 1.78 0
1659 Color NaN 9.0 60.0 NaN 468.0 Joan Allen 971.0 NaN Drama|Mystery ... 30.0 English USA TV-PG NaN NaN 805.0 7.5 16.00 0
1664 Color NaN 52.0 28.0 NaN 576.0 Kevin Connolly 680.0 NaN Comedy|Drama ... 145.0 English USA TV-MA NaN NaN 638.0 8.5 1.78 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2775 Color NaN 17.0 60.0 NaN 84.0 Steve Gonsalves 155.0 NaN Documentary ... 57.0 English USA NaN NaN NaN 130.0 6.6 NaN 373
2824 Color NaN 53.0 55.0 NaN 2.0 Olaf Lubaszenko 20.0 447093.0 Drama ... 37.0 Polish Poland TV-MA NaN NaN 3.0 9.1 1.33 0
2828 Color NaN 1.0 41.0 NaN 2.0 Marian Dziedziel 70.0 NaN Action|Crime|Drama|Thriller ... 2.0 Polish Poland NaN NaN NaN 2.0 7.4 NaN 64
2952 Color NaN 68.0 55.0 NaN 898.0 Kelvin Taylor 1000.0 NaN Action|Adventure|Biography|Drama|History ... 302.0 English USA TV-MA NaN NaN 939.0 8.6 16.00 31000
3084 Color NaN 3.0 24.0 NaN 44.0 Johnny Flynn 381.0 NaN Comedy ... 18.0 English UK NaN NaN NaN 102.0 7.9 NaN 0
3174 Color NaN 68.0 40.0 NaN 476.0 T.J. Thyne 1000.0 NaN Comedy|Crime|Drama|Mystery|Romance ... 173.0 English USA TV-14 NaN NaN 722.0 7.9 1.78 0
3207 Color NaN 53.0 55.0 NaN 2.0 Olaf Lubaszenko 20.0 447093.0 Drama ... 37.0 Polish Poland TV-MA NaN NaN 3.0 9.1 1.33 0
3303 Color NaN 1.0 30.0 NaN 140.0 Katherine Kelly Lang 177.0 NaN Drama|Romance ... 54.0 English USA TV-14 NaN NaN 170.0 3.5 1.78 748
3329 Color NaN 47.0 23.0 NaN 265.0 James Arnold Taylor 668.0 NaN Action|Adventure|Animation|Drama|Fantasy|Sci-Fi ... 73.0 English USA TV-PG NaN NaN 296.0 7.9 2.35 2000
3376 Color NaN 9.0 60.0 NaN 460.0 Philip Winchester 666.0 NaN Action|Crime|Drama|Thriller ... 25.0 English USA TV-14 NaN NaN 579.0 7.1 16.00 0
3397 Color NaN 18.0 60.0 NaN 648.0 Joseph Gilgun 3000.0 NaN Adventure|Drama|Fantasy|Mystery ... 85.0 English NaN TV-MA NaN NaN 788.0 8.3 16.00 18000
3419 Color NaN 9.0 142.0 NaN 427.0 Jack O'Connell 27000.0 NaN Drama|Romance ... 33.0 English UK NaN NaN NaN 698.0 7.7 NaN 0
3481 Color NaN 54.0 53.0 NaN 1000.0 Adam Goldberg 4000.0 NaN Crime|Drama|Thriller ... 173.0 English USA TV-MA NaN NaN 1000.0 9.0 1.78 61000
3490 Color NaN 96.0 44.0 NaN 970.0 Alyson Hannigan 4000.0 NaN Action|Drama|Fantasy|Romance ... 665.0 English USA TV-14 2300000.0 NaN 3000.0 8.2 1.33 6000
3507 Color NaN 27.0 60.0 NaN 346.0 Xander Berkeley 787.0 NaN Action|Crime|Drama|Thriller ... 83.0 English USA NaN NaN NaN 485.0 7.7 16.00 0
3509 Color NaN 11.0 60.0 NaN 652.0 Ashley Scott 10000.0 NaN Action|Drama|Mystery|Sci-Fi ... 160.0 English USA TV-14 NaN NaN 794.0 7.4 1.33 0
3579 Color NaN 10.0 55.0 NaN 5.0 Fortunato Cerlino 18.0 NaN Crime|Drama|Thriller ... 25.0 Italian Italy TV-MA NaN NaN 9.0 8.7 1.85 0
3604 Color NaN 10.0 NaN NaN 502.0 Tuppence Middleton 1000.0 NaN Drama|History|Romance|War ... 44.0 English UK TV-14 NaN NaN 888.0 8.2 16.00 11000
3623 Color NaN 47.0 44.0 NaN 724.0 Jason Dohring 937.0 NaN Crime|Drama|Mystery ... 315.0 English USA TV-14 NaN NaN 828.0 8.4 1.78 0
3650 Color NaN 10.0 240.0 NaN 334.0 Blake Ritson 805.0 NaN Comedy|Drama|Romance ... 50.0 English UK NaN NaN NaN 432.0 8.2 1.78 0
3661 Color NaN 15.0 25.0 NaN 220.0 Belinda Stewart-Wilson 229.0 NaN Comedy|Romance ... 42.0 English UK TV-14 NaN NaN 223.0 8.4 16.00 0
3800 Color NaN 9.0 60.0 NaN 904.0 Dylan Minnette 1000.0 NaN Drama|Fantasy ... 47.0 English USA TV-MA NaN NaN 1000.0 7.5 1.78 634
3807 Color NaN 38.0 44.0 NaN 816.0 Dulé Hill 1000.0 NaN Comedy|Crime|Mystery ... 127.0 English USA TV-PG NaN NaN 922.0 8.4 1.78 0
3847 Color NaN 5.0 43.0 NaN 298.0 Indiana Evans 562.0 NaN Crime|Drama ... 27.0 English USA NaN NaN NaN 560.0 7.7 16.00 2000
3871 Color NaN 2.0 199.0 NaN 505.0 Jonathan Crombie 662.0 NaN Drama|Family ... 74.0 English Canada TV-G NaN NaN 517.0 8.4 1.33 0
3881 Color NaN 8.0 60.0 NaN 551.0 Daniella Alonso 1000.0 NaN Crime|Drama ... 23.0 English USA NaN NaN NaN 557.0 8.1 16.00 0
3900 Color NaN 9.0 60.0 NaN 904.0 Dylan Minnette 1000.0 NaN Drama|Fantasy ... 47.0 English USA TV-MA NaN NaN 1000.0 7.5 1.78 634
3951 Color NaN 5.0 23.0 NaN 218.0 Tiffany Thornton 254.0 NaN Comedy|Family|Romance ... 47.0 English USA TV-G NaN NaN 248.0 6.1 1.33 325
3982 Color NaN 55.0 25.0 NaN 395.0 Jamie Farr 443.0 NaN Comedy|Drama|War ... 147.0 English USA TV-PG NaN NaN 421.0 8.4 4.00 0
3989 Color NaN 22.0 42.0 NaN 496.0 Gabourey Sidibe 912.0 NaN Drama|Music ... 92.0 English USA TV-14 NaN NaN 906.0 7.8 16.00 14000

83 rows × 28 columns

In [21]:
df.movie_title[0]
Out[21]:
'Avatar\xa0'
In [22]:
df['movie_title'] = df.movie_title.str.strip()
In [23]:
df.movie_title[0]
Out[23]:
'Avatar'
In [24]:
for column in df.columns:
    if df[column].dtype == 'object':
        df[column] = df[column].str.strip()
In [25]:
"\n   \n \t lasdkj ldaskdja \n \t   ".strip()
Out[25]:
'lasdkj ldaskdja'
In [26]:
df[df.movie_title == 'Titanic']
Out[26]:
color director_name num_critic_for_reviews duration director_facebook_likes actor_3_facebook_likes actor_2_name actor_1_facebook_likes gross genres ... num_user_for_reviews language country content_rating budget title_year actor_2_facebook_likes imdb_score aspect_ratio movie_facebook_likes
26 Color James Cameron 315.0 194.0 0.0 794.0 Kate Winslet 29000.0 658672302.0 Drama|Romance ... 2528.0 English USA PG-13 200000000.0 1997.0 14000.0 7.7 2.35 26000

1 rows × 28 columns

In [28]:
df[df.director_name == 'James Cameron']['duration'].mean()
Out[28]:
156.85714285714286
In [38]:
df[(df.director_name == 'James Cameron') & (df.duration > 160)]
Out[38]:
color director_name num_critic_for_reviews duration director_facebook_likes actor_3_facebook_likes actor_2_name actor_1_facebook_likes gross genres ... num_user_for_reviews language country content_rating budget title_year actor_2_facebook_likes imdb_score aspect_ratio movie_facebook_likes
0 Color James Cameron 723.0 178.0 0.0 855.0 Joel David Moore 1000.0 760505847.0 Action|Adventure|Fantasy|Sci-Fi ... 3054.0 English USA PG-13 237000000.0 2009.0 936.0 7.9 1.78 33000
26 Color James Cameron 315.0 194.0 0.0 794.0 Kate Winslet 29000.0 658672302.0 Drama|Romance ... 2528.0 English USA PG-13 200000000.0 1997.0 14000.0 7.7 2.35 26000
606 Color James Cameron 82.0 171.0 0.0 638.0 Todd Graff 2000.0 54222000.0 Adventure|Drama|Sci-Fi|Thriller ... 380.0 English USA PG-13 69500000.0 1989.0 650.0 7.6 2.35 0

3 rows × 28 columns

In [37]:
(df.
 query('director_name == "James Cameron" and duration > 160')
 [['movie_title', 'country']])
Out[37]:
movie_title country
0 Avatar USA
26 Titanic USA
606 The Abyss USA
In [43]:
df[df.director_name.str.contains('Tarkovsk', na=False)]
Out[43]:
color director_name num_critic_for_reviews duration director_facebook_likes actor_3_facebook_likes actor_2_name actor_1_facebook_likes gross genres ... num_user_for_reviews language country content_rating budget title_year actor_2_facebook_likes imdb_score aspect_ratio movie_facebook_likes
1061 Black and White Andrei Tarkovsky 144.0 115.0 0.0 12.0 Anatoliy Solonitsyn 29.0 NaN Drama|Mystery|Sci-Fi ... 236.0 Russian Soviet Union PG 1000000.0 1972.0 29.0 8.1 2.35 0

1 rows × 28 columns

In [51]:
%matplotlib inline
import matplotlib.pyplot as plt
In [ ]:
df.color.value_counts()
In [55]:
plt.figure(figsize=(6, 6))
df['color'].value_counts(dropna=False).plot.pie()
Out[55]:
<matplotlib.axes._subplots.AxesSubplot at 0x1e6ca04df28>
In [70]:
len(df.drop_duplicates())
Out[70]:
3969
In [ ]:
df.sort_values('imdb_score', ascending=False)['movie_title']
In [63]:
df.groupby('country').mean()['imdb_score'].sort_values(asc
                                                       ending=False)
Out[63]:
country
Poland            8.533333
Libya             8.400000
Soviet Union      8.100000
West Germany      7.900000
New Zealand       7.390909
Iceland           7.333333
Sweden            7.325000
Brazil            7.233333
Panama            7.200000
Finland           7.200000
Taiwan            7.150000
Italy             7.018750
Norway            7.000000
Denmark           6.983333
Czech Republic    6.966667
Japan             6.957895
Netherlands       6.925000
Chile             6.900000
India             6.880000
Ireland           6.862500
Spain             6.811111
UK                6.801902
Mexico            6.730000
Greece            6.700000
Hong Kong         6.687500
France            6.687402
China             6.561538
Iran              6.500000
Hungary           6.450000
Australia         6.448889
Thailand          6.400000
USA               6.376252
Official site     6.300000
Germany           6.256627
Canada            6.220225
South Africa      6.133333
Bulgaria          6.100000
South Korea       6.092308
Russia            6.081818
Slovakia          6.000000
Turkey            6.000000
Switzerland       5.950000
Georgia           5.600000
Nigeria           5.600000
Cambodia          5.600000
Belgium           5.600000
Peru              5.400000
Romania           4.900000
Aruba             4.800000
New Line          4.400000
Bahamas           4.400000
Name: imdb_score, dtype: float64
In [72]:
len(df)
Out[72]:
4000
In [73]:
df_cleaned = df.drop_duplicates()
In [75]:
len(df_cleaned)
Out[75]:
3969
In [79]:
df.drop_duplicates(inplace=True)
In [78]:
len(df)
Out[78]:
3969
In [85]:
df.query('country == "Soviet Union"')['movie_title']
Out[85]:
1061    Solaris
Name: movie_title, dtype: object
In [84]:
(df.
 groupby('country')['imdb_score'].
 agg(['mean', 'count']).
 sort_values('mean', ascending=False))
Out[84]:
mean count
country
Poland 8.533333 3
Libya 8.400000 1
Soviet Union 8.100000 1
West Germany 7.900000 2
New Zealand 7.390909 11
Iceland 7.333333 3
Sweden 7.325000 4
Brazil 7.233333 3
Panama 7.200000 1
Finland 7.200000 1
Taiwan 7.150000 2
Italy 7.018750 16
Norway 7.000000 4
Denmark 6.983333 6
Czech Republic 6.966667 3
Netherlands 6.925000 4
Chile 6.900000 1
Japan 6.888889 18
India 6.880000 20
Ireland 6.862500 8
Spain 6.811111 27
UK 6.798634 366
Mexico 6.730000 10
Greece 6.700000 1
Hong Kong 6.687500 16
France 6.687402 127
China 6.508000 25
Iran 6.500000 1
Hungary 6.450000 2
Australia 6.448889 45
Thailand 6.400000 4
USA 6.375988 3011
Official site 6.300000 1
Germany 6.236585 82
Canada 6.219318 88
South Africa 6.133333 6
Bulgaria 6.100000 1
South Korea 6.092308 13
Russia 6.081818 11
Turkey 6.000000 1
Slovakia 6.000000 1
Switzerland 5.950000 2
Cambodia 5.600000 1
Georgia 5.600000 1
Belgium 5.600000 4
Nigeria 5.600000 1
Peru 5.400000 1
Romania 4.900000 1
Aruba 4.800000 1
Bahamas 4.400000 1
New Line 4.400000 1
In [88]:
df.country.value_counts()[:10].plot.bar()
Out[88]:
<matplotlib.axes._subplots.AxesSubplot at 0x1e6cb387be0>
In [96]:
(df
 .groupby('country')
 ['imdb_score']
 .agg(['mean', 'count']))
Out[96]:
mean count
country
Aruba 4.800000 1
Australia 6.448889 45
Bahamas 4.400000 1
Belgium 5.600000 4
Brazil 7.233333 3
Bulgaria 6.100000 1
Cambodia 5.600000 1
Canada 6.219318 88
Chile 6.900000 1
China 6.508000 25
Czech Republic 6.966667 3
Denmark 6.983333 6
Finland 7.200000 1
France 6.687402 127
Georgia 5.600000 1
Germany 6.236585 82
Greece 6.700000 1
Hong Kong 6.687500 16
Hungary 6.450000 2
Iceland 7.333333 3
India 6.880000 20
Iran 6.500000 1
Ireland 6.862500 8
Italy 7.018750 16
Japan 6.888889 18
Libya 8.400000 1
Mexico 6.730000 10
Netherlands 6.925000 4
New Line 4.400000 1
New Zealand 7.390909 11
Nigeria 5.600000 1
Norway 7.000000 4
Official site 6.300000 1
Panama 7.200000 1
Peru 5.400000 1
Poland 8.533333 3
Romania 4.900000 1
Russia 6.081818 11
Slovakia 6.000000 1
South Africa 6.133333 6
South Korea 6.092308 13
Soviet Union 8.100000 1
Spain 6.811111 27
Sweden 7.325000 4
Switzerland 5.950000 2
Taiwan 7.150000 2
Thailand 6.400000 4
Turkey 6.000000 1
UK 6.798634 366
USA 6.375988 3011
West Germany 7.900000 2
In [98]:
(df
 .groupby('country')
 ['imdb_score']
 .agg(['mean', 'count', 'std'])
 .query('count > 20 and std < 1')
 .sort_values('mean', ascending=False))
Out[98]:
mean count std
country
Spain 6.811111 27 0.790002
UK 6.798634 366 0.936811
Australia 6.448889 45 0.872984
In [117]:
df. \
groupby('country'). \
mean()
Out[117]:
num_critic_for_reviews duration director_facebook_likes actor_3_facebook_likes actor_1_facebook_likes gross num_voted_users cast_total_facebook_likes facenumber_in_poster num_user_for_reviews budget title_year actor_2_facebook_likes imdb_score aspect_ratio movie_facebook_likes profitable profit
country
Aruba 67.000000 91.000000 85.000000 105.000000 635.000000 1.007614e+07 11512.000000 1352.000000 0.000000 141.000000 3.500000e+07 1998.000000 316.000000 4.800000 2.350000 471.000000 0.000000 -2.492386e+07
Australia 145.844444 105.622222 189.244444 322.333333 5281.733333 4.333892e+07 72451.000000 7928.333333 1.222222 261.222222 3.584418e+07 2003.377778 1837.666667 6.448889 2.162000 12742.866667 0.355556 5.340987e+06
Bahamas 64.000000 94.000000 261.000000 179.000000 12000.000000 NaN 6964.000000 12676.000000 1.000000 47.000000 5.000000e+06 2014.000000 316.000000 4.400000 2.350000 0.000000 0.000000 NaN
Belgium 61.750000 103.000000 321.250000 274.250000 8141.750000 6.805665e+05 15629.500000 9973.000000 1.750000 42.750000 2.450000e+07 2013.500000 1016.250000 5.600000 2.350000 3838.500000 0.000000 -2.381943e+07
Brazil 84.666667 112.000000 106.333333 16.000000 429.666667 1.416100e+04 28677.333333 505.666667 1.333333 50.666667 5.333333e+06 2008.666667 45.666667 7.233333 2.183333 3887.666667 0.000000 -3.985839e+06
Bulgaria 136.000000 109.000000 19.000000 537.000000 1000.000000 NaN 41638.000000 2829.000000 0.000000 134.000000 7.000000e+06 2014.000000 889.000000 6.100000 2.350000 13000.000000 0.000000 NaN
Cambodia 17.000000 100.000000 5.000000 46.000000 192.000000 NaN 1933.000000 527.000000 0.000000 13.000000 NaN 2014.000000 134.000000 5.600000 2.350000 708.000000 0.000000 NaN
Canada 107.218391 101.875000 130.164706 258.602273 3129.318182 2.659560e+07 39875.784091 4294.022727 1.090909 206.172414 2.063136e+07 2004.047619 500.772727 6.219318 2.096429 3035.863636 0.227273 3.936365e+06
Chile 120.000000 127.000000 36.000000 399.000000 562.000000 1.218864e+07 21098.000000 2230.000000 7.000000 56.000000 2.600000e+07 2015.000000 499.000000 6.900000 2.350000 0.000000 0.000000 -1.381136e+07
China 100.440000 118.600000 202.680000 138.840000 2001.720000 1.675405e+07 32093.600000 2927.640000 1.320000 122.080000 6.910185e+07 2009.400000 656.120000 6.508000 2.328261 2579.440000 0.080000 -6.672914e+07
Czech Republic 134.000000 117.000000 61.333333 269.666667 9276.666667 1.294772e+06 25580.333333 10100.333333 1.333333 176.000000 4.881667e+07 2006.666667 319.333333 6.966667 2.183333 6333.333333 0.000000 -4.752189e+07
Denmark 221.000000 110.833333 1513.833333 475.333333 8089.333333 1.734539e+06 55988.833333 10323.666667 0.833333 323.333333 1.986667e+07 2005.666667 1260.166667 6.983333 2.266667 13593.500000 0.000000 -1.813213e+07
Finland 205.000000 93.000000 592.000000 36.000000 232.000000 6.117090e+05 15267.000000 391.000000 1.000000 41.000000 3.850000e+06 2011.000000 67.000000 7.200000 1.850000 0.000000 0.000000 -3.238291e+06
France 140.519685 111.889764 373.656000 526.825397 5013.511811 1.787681e+07 59801.692913 7138.708661 1.173228 204.724409 3.638139e+07 2005.704000 1117.125984 6.687402 2.309174 5561.960630 0.133858 -1.939491e+07
Georgia 74.000000 113.000000 212.000000 18.000000 567.000000 1.714900e+04 12128.000000 708.000000 9.000000 61.000000 2.000000e+07 2011.000000 111.000000 5.600000 2.350000 0.000000 0.000000 -1.998285e+07
Germany 143.536585 110.963415 304.304878 576.780488 6844.756098 3.126474e+07 78016.463415 9800.451220 1.390244 312.878049 3.725899e+07 2003.829268 1624.731707 6.236585 2.172561 5157.097561 0.329268 -7.887524e+06
Greece 263.000000 112.000000 337.000000 201.000000 4000.000000 3.707794e+06 31359.000000 5671.000000 0.000000 156.000000 1.800000e+07 2016.000000 820.000000 6.700000 2.350000 0.000000 0.000000 -1.429221e+07
Hong Kong 141.500000 101.187500 46.562500 181.500000 3307.312500 1.204466e+07 31135.062500 4733.000000 1.187500 116.125000 2.650714e+07 2006.187500 1026.187500 6.687500 2.318750 1766.500000 0.125000 -1.611297e+07
Hungary 83.500000 121.500000 30.000000 175.000000 1504.500000 1.168760e+07 19277.000000 2134.000000 1.500000 44.500000 1.260000e+09 2008.000000 388.500000 6.450000 2.100000 3303.500000 0.500000 -1.248312e+09
Iceland 47.666667 230.333333 16.500000 40.000000 109.000000 1.589700e+04 2602.333333 228.666667 0.666667 13.333333 6.900000e+06 2011.000000 45.666667 7.333333 6.900000 333.333333 0.000000 -6.884103e+06
India 32.850000 145.263158 66.400000 199.000000 2011.500000 2.109720e+06 20846.650000 2967.800000 1.300000 120.850000 1.676271e+08 2009.650000 439.100000 6.880000 2.304545 3792.500000 0.000000 -2.103829e+08
Iran 2.000000 127.000000 109.000000 324.000000 16000.000000 1.000000e+06 524.000000 17250.000000 0.000000 8.000000 1.400000e+07 1978.000000 469.000000 6.500000 2.350000 76.000000 0.000000 -1.300000e+07
Ireland 202.250000 102.625000 245.625000 298.625000 2949.875000 7.596179e+06 49071.875000 5379.375000 2.250000 175.500000 1.650000e+07 2008.500000 1896.500000 6.862500 2.225000 11295.625000 0.125000 -6.676757e+06
Italy 84.375000 123.937500 273.266667 187.062500 2368.125000 3.521336e+06 40539.187500 3196.375000 2.500000 124.500000 1.936154e+07 1997.066667 316.875000 7.018750 2.070667 5360.312500 0.062500 -1.381616e+07
Japan 122.000000 111.555556 1441.470588 220.058824 3275.500000 4.000085e+07 76818.555556 4122.722222 0.222222 219.888889 3.745575e+08 2002.647059 310.235294 6.888889 2.026471 6388.611111 0.111111 -3.850649e+08
Libya 24.000000 156.000000 278.000000 249.000000 695.000000 NaN 9852.000000 1573.000000 0.000000 55.000000 3.500000e+07 1980.000000 279.000000 8.400000 2.350000 0.000000 0.000000 NaN
Mexico 65.700000 115.100000 51.200000 175.100000 441.800000 9.616342e+06 20704.800000 1261.400000 1.100000 80.000000 8.064847e+06 2011.600000 239.200000 6.730000 2.207143 5921.100000 0.200000 3.103210e+06
Netherlands 113.750000 114.750000 216.250000 77.250000 658.000000 1.884888e+06 27030.750000 1025.000000 1.000000 108.000000 1.185000e+07 1999.000000 183.000000 6.925000 2.225000 172.250000 0.000000 -7.948445e+06
New Line 62.000000 104.000000 23.000000 591.000000 752.000000 6.712451e+06 4102.000000 3133.000000 0.000000 89.000000 9.000000e+07 2001.000000 631.000000 4.400000 1.850000 53.000000 0.000000 -8.328755e+07
New Zealand 201.090909 136.000000 43.363636 250.818182 5652.636364 1.079822e+08 222322.454545 7137.909091 0.727273 1122.090909 8.515000e+07 2004.090909 883.454545 7.390909 2.259091 8919.545455 0.545455 2.609120e+07
Nigeria 6.000000 95.000000 20.000000 36.000000 262.000000 NaN 385.000000 409.000000 3.000000 4.000000 7.500000e+06 2012.000000 65.000000 5.600000 NaN 389.000000 0.000000 NaN
Norway 99.500000 91.500000 175.000000 11.250000 343.500000 1.196752e+06 25848.750000 400.000000 1.750000 57.000000 3.245000e+07 2005.000000 28.750000 7.000000 6.913333 9819.500000 0.000000 -2.910325e+07
Official site 135.000000 117.000000 19.000000 461.000000 3000.000000 2.021892e+07 14814.000000 4204.000000 4.000000 114.000000 1.500000e+07 2010.000000 646.000000 6.300000 2.350000 0.000000 1.000000 5.218921e+06
Panama NaN 105.000000 23.000000 897.000000 22000.000000 NaN 178.000000 27425.000000 0.000000 1.000000 2.000000e+07 2016.000000 2000.000000 7.200000 NaN 0.000000 0.000000 NaN
Peru 38.000000 110.000000 49.000000 20.000000 13000.000000 5.736258e+07 50148.000000 13331.000000 2.000000 116.000000 4.500000e+07 1994.000000 279.000000 5.400000 1.850000 0.000000 1.000000 1.236258e+07
Poland 35.666667 50.333333 NaN 2.000000 36.666667 4.470930e+05 8484.000000 41.333333 1.333333 25.333333 NaN NaN 2.666667 8.533333 1.330000 21.333333 0.000000 NaN
Romania 148.000000 88.000000 64.000000 363.000000 1000.000000 1.701619e+07 28632.000000 2567.000000 0.000000 214.000000 1.200000e+07 2004.000000 857.000000 4.900000 1.850000 0.000000 1.000000 5.016190e+06
Russia 86.727273 109.545455 80.181818 26.090909 416.000000 3.739952e+06 17325.000000 564.181818 0.909091 82.272727 1.727000e+07 2010.272727 96.636364 6.081818 1.901667 4576.000000 0.000000 -9.914074e+06
Slovakia 25.000000 141.000000 15.000000 576.000000 735.000000 NaN 3425.000000 2528.000000 1.000000 32.000000 NaN 2008.000000 627.000000 6.000000 2.350000 0.000000 0.000000 NaN
South Africa 128.000000 95.833333 162.833333 1972.000000 6848.833333 8.036764e+07 104755.000000 14303.666667 1.000000 262.500000 1.900000e+07 2012.166667 4169.000000 6.133333 1.993333 13678.333333 0.333333 5.286764e+07
South Korea 118.692308 120.923077 140.307692 207.615385 2416.538462 2.237449e+06 30265.461538 3253.076923 1.230769 129.846154 1.509502e+09 2007.076923 321.384615 6.092308 2.207273 5452.846154 0.000000 -2.064894e+09
Soviet Union 144.000000 115.000000 0.000000 12.000000 29.000000 NaN 54057.000000 95.000000 0.000000 236.000000 1.000000e+06 1972.000000 29.000000 8.100000 2.350000 0.000000 0.000000 NaN
Spain 167.148148 109.814815 1015.555556 267.222222 6022.074074 9.925527e+06 79786.444444 7428.148148 1.000000 199.407407 4.986739e+07 2007.296296 817.000000 6.811111 2.188889 9317.000000 0.148148 -4.566598e+07
Sweden 78.666667 147.750000 86.750000 115.000000 509.000000 1.852625e+05 16820.750000 1187.000000 0.750000 95.000000 2.500000e+07 2006.333333 260.000000 7.325000 2.183333 5.500000 0.000000 NaN
Switzerland 98.000000 107.500000 30.500000 263.500000 496.000000 NaN 14095.000000 1433.500000 1.500000 62.000000 1.725000e+07 2011.500000 456.000000 5.950000 2.350000 4000.000000 0.000000 NaN
Taiwan 246.000000 112.500000 70.500000 30.500000 551.500000 6.434068e+07 113583.500000 650.500000 2.000000 864.000000 1.500000e+07 2007.500000 62.000000 7.150000 1.860000 0.000000 0.500000 4.934068e+07
Thailand 79.750000 154.250000 18.250000 174.500000 712.250000 3.115498e+06 15418.500000 1293.250000 2.000000 92.750000 2.272500e+08 2007.000000 263.500000 6.400000 2.100000 31.000000 0.000000 -2.241345e+08
Turkey 16.000000 122.000000 11.000000 173.000000 205.000000 NaN 14486.000000 808.000000 3.000000 159.000000 8.300000e+06 2006.000000 197.000000 6.000000 1.850000 467.000000 0.000000 NaN
UK 165.457534 112.449036 314.025352 533.526027 6413.784153 3.680645e+07 86899.857923 9061.920765 1.333333 307.158904 3.226442e+07 2001.878873 1436.371585 6.798634 2.324602 9299.180328 0.336066 4.835017e+06
USA 159.025965 109.191758 929.757955 840.003657 8105.035204 6.164277e+07 107147.681833 12218.585520 1.446738 327.490027 4.278304e+07 2002.781991 2170.686607 6.375988 2.201786 9139.961142 0.508137 1.830874e+07
West Germany 97.500000 193.500000 249.000000 144.500000 442.500000 1.143313e+07 133880.000000 1014.500000 0.500000 355.000000 2.050000e+07 1982.500000 166.500000 7.900000 2.100000 16000.000000 0.000000 -2.566866e+06
In [108]:
df['profitable'] = (df.gross > df.budget).astype(int)
In [110]:
df['profit'] = df.gross - df.budget
In [112]:
df.new_column = df.gross + df.budget
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access
  """Entry point for launching an IPython kernel.
In [113]:
 
Out[113]:
color director_name num_critic_for_reviews duration director_facebook_likes actor_3_facebook_likes actor_2_name actor_1_facebook_likes gross genres ... country content_rating budget title_year actor_2_facebook_likes imdb_score aspect_ratio movie_facebook_likes profitable profit
0 Color James Cameron 723.0 178.0 0.0 855.0 Joel David Moore 1000.0 760505847.0 Action|Adventure|Fantasy|Sci-Fi ... USA PG-13 237000000.0 2009.0 936.0 7.9 1.78 33000 1 523505847.0
1 Color Gore Verbinski 302.0 169.0 563.0 1000.0 Orlando Bloom 40000.0 309404152.0 Action|Adventure|Fantasy ... USA PG-13 300000000.0 2007.0 5000.0 7.1 2.35 0 1 9404152.0
2 Color Sam Mendes 602.0 148.0 0.0 161.0 Rory Kinnear 11000.0 200074175.0 Action|Adventure|Thriller ... UK PG-13 245000000.0 2015.0 393.0 6.8 2.35 85000 0 -44925825.0
3 Color Christopher Nolan 813.0 164.0 22000.0 23000.0 Christian Bale 27000.0 448130642.0 Action|Thriller ... USA PG-13 250000000.0 2012.0 23000.0 8.5 2.35 164000 1 198130642.0
4 NaN Doug Walker NaN NaN 131.0 NaN Rob Walker 131.0 NaN Documentary ... NaN NaN NaN NaN 12.0 7.1 NaN 0 0 NaN
5 Color Andrew Stanton 462.0 132.0 475.0 530.0 Samantha Morton 640.0 73058679.0 Action|Adventure|Sci-Fi ... USA PG-13 263700000.0 2012.0 632.0 6.6 2.35 24000 0 -190641321.0
6 Color Sam Raimi 392.0 156.0 0.0 4000.0 James Franco 24000.0 336530303.0 Action|Adventure|Romance ... USA PG-13 258000000.0 2007.0 11000.0 6.2 2.35 0 1 78530303.0
7 Color Nathan Greno 324.0 100.0 15.0 284.0 Donna Murphy 799.0 200807262.0 Adventure|Animation|Comedy|Family|Fantasy|Musi... ... USA PG 260000000.0 2010.0 553.0 7.8 1.85 29000 0 -59192738.0
8 Color Joss Whedon 635.0 141.0 0.0 19000.0 Robert Downey Jr. 26000.0 458991599.0 Action|Adventure|Sci-Fi ... USA PG-13 250000000.0 2015.0 21000.0 7.5 2.35 118000 1 208991599.0
9 Color David Yates 375.0 153.0 282.0 10000.0 Daniel Radcliffe 25000.0 301956980.0 Adventure|Family|Fantasy|Mystery ... UK PG 250000000.0 2009.0 11000.0 7.5 2.35 10000 1 51956980.0
10 Color Zack Snyder 673.0 183.0 0.0 2000.0 Lauren Cohan 15000.0 330249062.0 Action|Adventure|Sci-Fi ... USA PG-13 250000000.0 2016.0 4000.0 6.9 2.35 197000 1 80249062.0
11 Color Bryan Singer 434.0 169.0 0.0 903.0 Marlon Brando 18000.0 200069408.0 Action|Adventure|Sci-Fi ... USA PG-13 209000000.0 2006.0 10000.0 6.1 2.35 0 0 -8930592.0
12 Color Marc Forster 403.0 106.0 395.0 393.0 Mathieu Amalric 451.0 168368427.0 Action|Adventure ... UK PG-13 200000000.0 2008.0 412.0 6.7 2.35 0 0 -31631573.0
13 Color Gore Verbinski 313.0 151.0 563.0 1000.0 Orlando Bloom 40000.0 423032628.0 Action|Adventure|Fantasy ... USA PG-13 225000000.0 2006.0 5000.0 7.3 2.35 5000 1 198032628.0
14 Color Gore Verbinski 450.0 150.0 563.0 1000.0 Ruth Wilson 40000.0 89289910.0 Action|Adventure|Western ... USA PG-13 215000000.0 2013.0 2000.0 6.5 2.35 48000 0 -125710090.0
15 Color Zack Snyder 733.0 143.0 0.0 748.0 Christopher Meloni 15000.0 291021565.0 Action|Adventure|Fantasy|Sci-Fi ... USA PG-13 225000000.0 2013.0 3000.0 7.2 2.35 118000 1 66021565.0
16 Color Andrew Adamson 258.0 150.0 80.0 201.0 Pierfrancesco Favino 22000.0 141614023.0 Action|Adventure|Family|Fantasy ... USA PG 225000000.0 2008.0 216.0 6.6 2.35 0 0 -83385977.0
17 Color Joss Whedon 703.0 173.0 0.0 19000.0 Robert Downey Jr. 26000.0 623279547.0 Action|Adventure|Sci-Fi ... USA PG-13 220000000.0 2012.0 21000.0 8.1 1.85 123000 1 403279547.0
18 Color Rob Marshall 448.0 136.0 252.0 1000.0 Sam Claflin 40000.0 241063875.0 Action|Adventure|Fantasy ... USA PG-13 250000000.0 2011.0 11000.0 6.7 2.35 58000 0 -8936125.0
19 Color Barry Sonnenfeld 451.0 106.0 188.0 718.0 Michael Stuhlbarg 10000.0 179020854.0 Action|Adventure|Comedy|Family|Fantasy|Sci-Fi ... USA PG-13 225000000.0 2012.0 816.0 6.8 1.85 40000 0 -45979146.0
20 Color Peter Jackson 422.0 164.0 0.0 773.0 Adam Brown 5000.0 255108370.0 Adventure|Fantasy ... New Zealand PG-13 250000000.0 2014.0 972.0 7.5 2.35 65000 1 5108370.0
21 Color Marc Webb 599.0 153.0 464.0 963.0 Andrew Garfield 15000.0 262030663.0 Action|Adventure|Fantasy ... USA PG-13 230000000.0 2012.0 10000.0 7.0 2.35 56000 1 32030663.0
22 Color Ridley Scott 343.0 156.0 0.0 738.0 William Hurt 891.0 105219735.0 Action|Adventure|Drama|History ... USA PG-13 200000000.0 2010.0 882.0 6.7 2.35 17000 0 -94780265.0
23 Color Peter Jackson 509.0 186.0 0.0 773.0 Adam Brown 5000.0 258355354.0 Adventure|Fantasy ... USA PG-13 225000000.0 2013.0 972.0 7.9 2.35 83000 1 33355354.0
24 Color Chris Weitz 251.0 113.0 129.0 1000.0 Eva Green 16000.0 70083519.0 Adventure|Family|Fantasy ... USA PG-13 180000000.0 2007.0 6000.0 6.1 2.35 0 0 -109916481.0
25 Color Peter Jackson 446.0 201.0 0.0 84.0 Thomas Kretschmann 6000.0 218051260.0 Action|Adventure|Drama|Romance ... New Zealand PG-13 207000000.0 2005.0 919.0 7.2 2.35 0 1 11051260.0
26 Color James Cameron 315.0 194.0 0.0 794.0 Kate Winslet 29000.0 658672302.0 Drama|Romance ... USA PG-13 200000000.0 1997.0 14000.0 7.7 2.35 26000 1 458672302.0
27 Color Anthony Russo 516.0 147.0 94.0 11000.0 Scarlett Johansson 21000.0 407197282.0 Action|Adventure|Sci-Fi ... USA PG-13 250000000.0 2016.0 19000.0 8.2 2.35 72000 1 157197282.0
28 Color Peter Berg 377.0 131.0 532.0 627.0 Alexander Skarsgård 14000.0 65173160.0 Action|Adventure|Sci-Fi|Thriller ... USA PG-13 209000000.0 2012.0 10000.0 5.9 2.35 44000 0 -143826840.0
29 Color Colin Trevorrow 644.0 124.0 365.0 1000.0 Judy Greer 3000.0 652177271.0 Action|Adventure|Sci-Fi|Thriller ... USA PG-13 150000000.0 2015.0 2000.0 7.0 2.00 150000 1 502177271.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3970 Color Victor Fleming 157.0 226.0 149.0 248.0 George Reeves 503.0 198655278.0 Drama|History|Romance|War ... USA G 3977000.0 1939.0 384.0 8.2 1.37 16000 1 194678278.0
3971 Color Richard Raymond 31.0 104.0 10.0 196.0 Reece Ritchie 507.0 143653.0 Biography|Drama ... UK PG-13 4000000.0 2014.0 236.0 6.0 2.35 0 0 -3856347.0
3972 Color Sam Peckinpah 53.0 152.0 541.0 288.0 Slim Pickens 773.0 14873.0 Adventure|War|Western ... USA Approved 3800000.0 1965.0 575.0 6.8 2.35 251 0 -3785127.0
3973 Color Alan Jacobs 1.0 92.0 2.0 601.0 Laz Alonso 881.0 NaN Drama ... USA R NaN 2009.0 826.0 7.3 1.85 237 0 NaN
3974 Color George Sidney 21.0 107.0 24.0 83.0 Howard Keel 277.0 8000000.0 Biography|Comedy|Musical|Romance|Western ... USA Passed 3768785.0 1950.0 244.0 7.0 1.37 456 1 4231215.0
3975 Color Christopher Morris 246.0 97.0 69.0 106.0 Riz Ahmed 414.0 304137.0 Comedy|Crime|Drama ... UK R NaN 2010.0 365.0 7.3 1.85 17000 0 NaN
3976 Color Andrucha Waddington 42.0 115.0 6.0 24.0 Seu Jorge 119.0 NaN Drama ... Brazil R 8000000.0 2005.0 69.0 7.5 2.35 385 0 NaN
3977 Color Peter Stebbings 78.0 95.0 89.0 388.0 Tony Nappo 963.0 37606.0 Comedy|Crime|Drama ... Canada R 3500000.0 2009.0 654.0 6.8 2.35 0 0 -3462394.0
3978 Color Vincente Minnelli 41.0 102.0 136.0 48.0 Reginald Owen 89.0 2956000.0 Adventure|Comedy|Musical|Romance ... USA Approved 3700000.0 1948.0 78.0 7.1 1.37 359 0 -744000.0
3979 Color Dagur Kári 61.0 99.0 29.0 64.0 Susan Blommaert 117.0 19959.0 Drama ... Iceland R 3800000.0 2009.0 64.0 6.9 2.35 0 0 -3780041.0
3980 Color Nicholas Hytner 112.0 109.0 13.0 480.0 Russell Tovey 3000.0 2706659.0 Comedy|Drama ... UK R 2000000.0 2006.0 796.0 6.9 1.85 0 1 706659.0
3981 Color Jaume Collet-Serra 349.0 113.0 174.0 767.0 Frank Langella 14000.0 61094903.0 Action|Mystery|Thriller ... UK PG-13 30000000.0 2011.0 902.0 6.9 2.35 29000 1 31094903.0
3982 Color NaN 55.0 25.0 NaN 395.0 Jamie Farr 443.0 NaN Comedy|Drama|War ... USA TV-PG NaN NaN 421.0 8.4 4.00 0 0 NaN
3983 Black and White John Schlesinger 88.0 113.0 154.0 77.0 Barnard Hughes 183.0 NaN Drama ... USA X 3600000.0 1969.0 89.0 7.9 1.85 0 0 NaN
3984 Color Peter Cattaneo 122.0 91.0 11.0 121.0 Mark Addy 1000.0 45857453.0 Comedy|Drama|Music ... UK R 3500000.0 1997.0 891.0 7.2 1.85 0 1 42357453.0
3985 Color Jim Abrahams 134.0 88.0 104.0 318.0 Lloyd Bridges 628.0 83400000.0 Comedy ... USA PG 3500000.0 1980.0 575.0 7.8 1.85 16000 1 79900000.0
3986 Color Kevin Carraway 11.0 88.0 2.0 6.0 Max Ryan 2000.0 NaN Action|Thriller ... USA R 5000000.0 2015.0 872.0 3.5 1.33 358 0 NaN
3987 Black and White F. Gary Gray 20.0 97.0 473.0 624.0 John Witherspoon 826.0 27900000.0 Comedy|Drama ... USA R 3500000.0 1995.0 723.0 7.3 1.85 8000 1 24400000.0
3988 Color Albert Hughes 28.0 97.0 117.0 556.0 Larenz Tate 851.0 27900000.0 Crime|Drama|Thriller ... USA R 3500000.0 1993.0 582.0 7.5 1.85 0 1 24400000.0
3989 Color NaN 22.0 42.0 NaN 496.0 Gabourey Sidibe 912.0 NaN Drama|Music ... USA TV-14 NaN NaN 906.0 7.8 16.00 14000 0 NaN
3990 Color Michael Gornick 95.0 85.0 10.0 178.0 Holt McCallany 3000.0 14000000.0 Comedy|Fantasy|Horror|Thriller ... USA R 3500000.0 1987.0 273.0 6.0 1.85 1000 1 10500000.0
3991 Color Sam Peckinpah 31.0 121.0 541.0 242.0 Jason Robards 575.0 NaN Comedy|Drama|Romance|Western ... USA R 3716946.0 1970.0 372.0 7.3 1.85 357 0 NaN
3992 Black and White Richard Brooks 79.0 134.0 174.0 188.0 Robert Blake 255.0 NaN Biography|Crime|Drama|History ... USA Approved 3500000.0 1967.0 220.0 8.0 2.35 0 0 NaN
3993 Color Fred Zinnemann 25.0 149.0 160.0 75.0 Peter Finch 157.0 NaN Drama ... USA Not Rated 3500000.0 1959.0 139.0 7.6 1.78 930 0 NaN
3994 Color Jack Smight 22.0 121.0 22.0 367.0 Robert Wagner 606.0 NaN Action|Crime|Drama|Mystery|Thriller ... USA Not Rated 3500000.0 1966.0 481.0 7.0 2.35 470 0 NaN
3995 Color Alfred Hitchcock 110.0 116.0 13000.0 118.0 Jean Marsh 195.0 NaN Thriller ... UK R 2000000.0 1972.0 146.0 7.5 1.85 0 0 NaN
3996 Color Robert Eggers 425.0 92.0 22.0 159.0 Kate Dickie 648.0 25138292.0 Horror|Mystery ... USA R 3500000.0 2015.0 191.0 6.8 1.66 43000 1 21638292.0
3997 Color Michael Martin 19.0 93.0 0.0 78.0 Master P 625.0 10305534.0 Comedy ... USA R 3500000.0 1998.0 118.0 3.9 1.85 280 1 6805534.0
3998 Color Edward Burns 43.0 96.0 0.0 111.0 Frank Vincent 385.0 9449219.0 Comedy|Drama|Romance ... USA R 3500000.0 1996.0 356.0 6.1 1.85 453 1 5949219.0
3999 Black and White Bill Condon 127.0 105.0 386.0 248.0 Lynn Redgrave 3000.0 6390032.0 Biography|Drama ... USA R 3500000.0 1998.0 258.0 7.5 2.35 0 1 2890032.0

3969 rows × 30 columns