NOTA IMDB E LUCRO

library(tidyverse)
imdb <- read_rds("../dados/imdb.rds")
imdb %>% 
    mutate(lucro  = receita-orcamento) %>%
    summarise(m_duracao   = mean(duracao, na.rm = TRUE),
              m_orcamento = mean(orcamento, na.rm=TRUE),
              m_nota_meia = mean(nota_imdb, na.rm=TRUE),
              m_lucro     = mean(lucro, na.rm=TRUE),
              num_films   = n()) %>% 
    knitr::kable(caption = "Meia das variables do estudio", digits=2, format.args = list(big.mark = ",", 
    scientific = FALSE)) 
Meia das variables do estudio
m_duracao m_orcamento m_nota_meia m_lucro num_films
106.37 35,237,114 6.37 17,161,639 3,713
imdb %>% 

  group_by(nota_imdb) %>% 
  mutate(lucro  = receita-orcamento) %>%
  mutate(lucrou = ifelse(lucro>0, "SI","NO"))%>%
  ggplot() +
  geom_point(aes(x = lucro, y = nota_imdb, color = lucrou))+
  geom_abline(intercept = 0, slope = 1, color = "blue")+
  labs(
  title = "Gráfico de dispersão",
  subtitle = "Lucro vs Nota Imdb" )

imdb %>% 

  #group_by() %>% 
  mutate(lucro  = receita-orcamento) %>%
  mutate(lucrou = ifelse(lucro>0, "SI","NO"))%>%
  ggplot() +
  geom_point(aes(x = orcamento, y = lucro, color = lucrou))+
  geom_hline(yintercept = 0,color = "blue")+
  labs(
  title = "Gráfico de dispersão",
  subtitle = "Lucro vs Orcamento" )

 nota1<-imdb %>% 
    mutate(lucro = receita-orcamento) %>%
    group_by(nota_imdb) %>% 
    summarise(lucro_medio = mean(lucro, na.rm = TRUE),
              duracao_meia= mean(duracao, na.rm=TRUE),
              orcamento_medio=mean(orcamento,na.rm = TRUE),
              num_films   = n()) %>%
    arrange(desc(nota_imdb)) %>%
    top_n (50,nota_imdb) 

  nota1 %>% 
    knitr::kable(caption = "LUCRO MEDIO POR NOTA IMDB", format.args = list(big.mark = ",", 
    scientific = FALSE))
LUCRO MEDIO POR NOTA IMDB
nota_imdb lucro_medio duracao_meia orcamento_medio num_films
9.3 3,341,469.0 142.00000 25,000,000 1
9.2 128,821,952.0 175.00000 6,000,000 1
9.1 NaN 90.00000 17,000,000 1
9.0 196,308,030.5 141.66667 99,000,000 3
8.9 152,338,810.3 162.75000 31,087,500 4
8.8 163,360,548.2 107.33333 74,000,000 6
8.7 186,846,842.0 105.66667 32,930,000 9
8.6 51,873,008.9 107.00000 31,113,000 12
8.5 98,526,189.5 129.61111 54,837,934 18
8.4 63,268,178.8 98.00000 26,942,308 17
8.3 77,955,748.7 122.82759 40,745,586 29
8.2 62,310,335.4 120.72000 41,293,478 25
8.1 89,883,063.2 119.56818 41,408,810 45
8.0 63,271,062.1 132.52000 30,189,362 50
7.9 64,981,687.6 123.02083 53,313,750 48
7.8 37,923,287.5 111.17391 37,757,620 69
7.7 44,638,976.9 115.83871 34,068,791 62
7.6 42,396,896.3 120.75281 34,392,993 89
7.5 30,993,385.4 106.32941 32,549,731 85
7.4 21,254,637.4 109.75532 25,931,335 94
7.3 34,075,561.5 108.69672 41,713,137 122
7.2 29,162,619.9 112.88636 35,397,813 133
7.1 14,654,894.1 106.78400 32,812,070 125
7.0 22,197,595.9 108.00000 37,206,352 128
6.9 23,340,037.0 111.61789 37,997,908 123
6.8 12,222,904.8 108.40000 31,548,102 135
6.7 13,048,874.7 109.16168 40,519,064 168
6.6 11,116,086.2 108.28846 40,070,354 156
6.5 14,288,025.4 106.73288 35,481,415 146
6.4 14,869,558.9 108.12414 42,117,949 145
6.3 4,950,227.4 104.95420 40,280,251 131
6.2 6,681,730.8 102.85496 33,368,742 131
6.1 10,601,720.2 104.93130 42,186,967 131
6.0 6,950,785.8 106.60000 39,477,326 95
5.9 3,721,166.4 101.77778 32,637,712 126
5.8 -446,488.8 102.68367 44,016,882 98
5.7 9,979,507.5 102.30000 30,475,116 90
5.6 -4,660,166.0 101.32584 37,441,131 89
5.5 5,223,751.5 100.37500 39,609,392 80
5.4 -3,564,249.4 98.90909 35,748,831 88
5.3 -6,985,906.2 97.95775 24,000,145 71
5.2 9,280,941.3 95.77193 30,035,717 57
5.1 713,842.1 97.05357 26,706,019 57
5.0 5,379,061.4 101.64706 24,623,387 34
4.9 437,100.5 97.45238 36,057,436 42
4.8 -1,719,612.2 95.52632 28,914,278 38
4.7 -6,866,391.7 94.09091 24,455,263 23
4.6 19,591,424.0 95.20588 20,768,750 34
4.5 5,915,980.4 96.03704 19,622,609 27
4.4 6,886,197.9 96.00000 25,737,500 20

ESTUDIANDO POR GENERO DE FILMS

imdb %>% 
  group_by(generos) %>% 
  mutate(lucro = receita-orcamento) %>%
  summarise(nota_media      = mean (nota_imdb, na.rm = TRUE),  
            media_orcamento = mean (orcamento, na.rm = TRUE),
            media_lucro     = mean (lucro,     na.rm = TRUE),
            num_films       = n()) %>% 
  arrange(desc(num_films) ) %>% 
  top_n(20,num_films) %>%
  knitr::kable(caption = "GENEROS CON MAIOR NUMERO DE FILMS", format.args = list(big.mark = ",", 
  scientific = FALSE))
GENEROS CON MAIOR NUMERO DE FILMS
generos nota_media media_orcamento media_lucro num_films
Comedy 5.776216 20,211,987 21,367,568 185
Drama 6.933133 12,033,697 8,524,681 166
Comedy|Drama 6.496710 14,495,104 11,310,121 152
Comedy|Drama|Romance 6.429530 21,147,200 11,531,093 149
Comedy|Romance 5.894815 25,040,242 22,992,323 135
Drama|Romance 6.841748 20,609,415 12,055,360 103
Crime|Drama|Thriller 6.675343 25,664,160 10,103,622 73
Horror 5.392727 8,462,019 33,117,199 55
Action|Adventure|Sci-Fi 6.633333 144,115,556 43,027,809 45
Action|Crime|Thriller 6.288889 45,044,444 12,316,009 45
Action|Crime|Drama|Thriller 6.370455 36,995,122 4,035,793 44
Crime|Drama 7.306818 14,462,250 16,159,900 44
Comedy|Crime 5.869767 25,785,366 19,280,536 43
Horror|Thriller 5.361905 9,281,501 16,405,033 42
Drama|Thriller 6.267500 22,160,417 8,788,240 40
Crime|Drama|Mystery|Thriller 6.966667 32,992,000 14,269,760 39
Documentary 6.991667 1,067,132 1,247,920 36
Horror|Mystery|Thriller 5.893548 17,015,757 23,602,440 31
Action|Adventure|Sci-Fi|Thriller 6.300000 103,107,692 18,122,529 26
Adventure|Animation|Comedy|Family|Fantasy 6.430769 96,000,000 54,891,179 26
Drama|Sport 7.123077 18,772,174 19,222,457 26
imdb %>% 
  group_by(generos) %>% 
  mutate(lucro = receita-orcamento) %>%
  summarise(nota_media      = mean (nota_imdb, na.rm = TRUE),  
            media_orcamento = mean (orcamento, na.rm = TRUE),
            media_lucro     = mean (lucro,     na.rm = TRUE),
            num_films       = n()) %>% 
  arrange(desc(media_lucro) ) %>% 
  top_n(20,media_lucro) %>%
  knitr::kable(caption = "GENEROS CON LUCRO MAIS ELEVADO", digits=2, format.args = list(big.mark = ",", 
  scientific = FALSE))
GENEROS CON LUCRO MAIS ELEVADO
generos nota_media media_orcamento media_lucro num_films
Family|Sci-Fi 5.65 5,425,000 424,449,459 2
Adventure|Animation|Drama|Family|Musical 8.50 45,000,000 377,783,777 1
Action|Biography|Drama|History|Thriller|War 7.30 58,800,000 291,323,553 1
Adventure|Drama|Fantasy|Romance 5.00 79,333,333 217,148,557 3
Action|Adventure|Fantasy|Sci-Fi 6.98 97,125,000 199,559,758 13
Drama|Fantasy|Romance|Thriller 7.00 22,000,000 195,631,306 1
Drama|History|Romance|War 7.20 4,488,500 194,678,278 2
Adventure|Animation|Comedy|Drama|Family|Fantasy 8.30 175,000,000 181,454,367 1
Action|Adventure|Animation|Family 8.00 92,000,000 169,437,578 1
Adventure|Comedy|Family|Mystery|Sci-Fi 7.30 90,000,000 160,147,615 1
Animation|Comedy|Family|Sci-Fi 7.25 88,000,000 158,459,955 2
Adventure|Drama|Sci-Fi|Thriller 7.45 73,750,000 157,360,628 2
Biography|Drama|Family|Musical|Romance 8.00 8,200,000 155,014,286 1
Animation|Comedy|Family|Fantasy|Music 4.85 67,500,000 150,969,864 2
Adventure|Sci-Fi|Thriller 6.80 108,333,333 148,946,823 6
Adventure|Animation|Comedy|Family|Fantasy|Romance 7.25 100,000,000 143,310,828 2
Action|Adventure|Comedy|Romance|Sci-Fi 6.13 34,000,000 143,160,018 3
Action|Adventure|Comedy|Family|Fantasy 6.40 110,000,000 140,863,268 1
Action|Animation|Comedy|Family|Sci-Fi 6.85 102,000,000 140,183,548 2
Action|Adventure|Crime|Drama|Mystery|Thriller 7.80 44,000,000 139,875,760 1

ESTUDIANDO POR DIRECTORES

imdb %>% 
  mutate(lucro = receita-orcamento) %>%
  group_by(diretor) %>% 
  summarise(lucro_medio = mean(lucro, na.rm = TRUE),
            nota_media  = mean(nota_imdb, na.rm = TRUE),
            num_films   = n()) %>%
  #top_n (20,lucro_medio) %>% 
  arrange(desc(lucro_medio)) %>%
  top_n (20,lucro_medio) %>%
  knitr::kable(caption = "20 DIRECTORES CON LUCRO MEDIO MAIS ELEVADO", digits=2, format.args = list(big.mark = ",", 
  scientific = FALSE))
20 DIRECTORES CON LUCRO MEDIO MAIS ELEVADO
diretor lucro_medio nota_media num_films
Tim Miller 305,024,263 8.10 1
George Lucas 277,328,296 7.40 5
Richard Marquand 276,625,409 8.40 1
Irvin Kershner 272,158,751 8.80 1
Kyle Balda 262,029,560 6.40 1
Colin Trevorrow 252,717,532 7.00 2
Chris Buck 250,736,600 7.60 1
Pierre Coffin 237,275,640 7.60 2
Lee Unkrich 214,984,497 8.30 1
Joss Whedon 199,202,360 7.87 3
James Cameron 194,620,985 7.88 6
Roger Allers 188,543,668 7.35 2
William Cottrell 182,925,485 7.70 1
Pete Docter 158,113,780 8.23 3
Francis Lawrence 151,100,394 7.00 5
Daniel Myrick 140,470,114 6.40 1
Peter Jackson 132,967,515 8.02 5
Andrew Adamson 130,611,730 7.08 5
Joel Zwick 129,275,992 5.45 2
Sam Taylor-Johnson 126,147,885 4.10 1
imdb %>% 
  mutate(lucro = receita-orcamento) %>%
  group_by(diretor) %>% 
  summarise(lucro_medio = mean(lucro, na.rm = TRUE),
            nota_media  = mean(nota_imdb, na.rm = TRUE),
            num_films   = n()) %>%
  #top_n (20,lucro_medio) %>% 
  arrange(desc(nota_media)) %>%
  top_n (20,nota_media) %>%
  knitr::kable(caption = "20 DIRECTORES CON NOTA MEIA MAIS ELEVADA", digits=2, format.args = list(big.mark = ",", 
  scientific = FALSE))
20 DIRECTORES CON NOTA MEIA MAIS ELEVADA
diretor lucro_medio nota_media num_films
Irvin Kershner 272,158,751 8.80 1
Cary Bell NaN 8.70 1
Mitchell Altieri NaN 8.70 1
Charles Chaplin -1,336,755 8.60 1
Mike Mayhall NaN 8.60 1
Damien Chazelle 9,792,000 8.50 1
Milos Forman 70,600,000 8.50 2
Ron Fricke -1,398,153 8.50 1
Stanley Kubrick NaN 8.45 2
Christopher Nolan 101,028,447 8.43 8
Bill Melendez NaN 8.40 1
Catherine Owens NaN 8.40 1
Jay Oliva NaN 8.40 1
Marius A. Markevicius -366,222 8.40 1
Richard Marquand 276,625,409 8.40 1
Robert Mulligan NaN 8.40 1
John Sturges NaN 8.30 1
Justin Paul Miller NaN 8.30 1
Lee Unkrich 214,984,497 8.30 1
Stanley Donen NaN 8.30 1
Sut Jhally NaN 8.30 1

RELACION ENTRE DURACION E LUCRO

graf<-imdb %>% 
  mutate(lucro = receita-orcamento) %>%
  mutate(lucrou = ifelse(lucro>0, "SI","NO"))%>%
  group_by(duracao) %>% 
  summarise(nota_media  = mean(nota_imdb, na.rm = TRUE),
            lucro_medio = mean(lucro, na.rm=TRUE),
            num_films   = n()) %>%

  arrange(desc(lucro_medio)) %>%
  top_n (20,lucro_medio) 

  graf %>%
  knitr::kable(caption = "MAIOR LUCRO POR DURACAO", digits=2, format.args = list(big.mark = ",", 
  scientific = FALSE))
MAIOR LUCRO POR DURACAO
duracao nota_media lucro_medio num_films
173 6.55 403,279,547 2
192 8.90 283,019,252 1
195 7.35 208,992,272 2
226 8.20 194,678,278 1
194 7.45 188,034,500 2
73 7.15 186,209,118 2
236 8.00 162,208,848 1
167 7.90 133,029,270 1
174 7.30 123,775,212 2
182 7.90 123,001,229 1
178 8.00 101,231,080 7
200 8.00 100,722,000 1
164 8.05 99,752,114 2
151 8.20 83,715,114 5
154 7.24 83,262,234 10
183 6.90 80,249,062 1
142 7.30 80,205,771 12
172 7.62 78,623,982 6
175 8.30 76,810,976 2
190 7.60 76,107,476 1
imdb %>% 

  mutate(lucro  = receita-orcamento) %>%
  mutate(lucrou = ifelse(lucro>0, "SI","NO"))%>%
  ggplot() +
  geom_point(aes(x = lucro, y = duracao, color = lucrou))+
  geom_abline(intercept = 0, slope = 1, color = "blue")+
  labs(
  title = "Gráfico de dispersão",
  subtitle = "Lucro vs Duracao" )

CONCLUSION

Athos e o Fernando deverian investir no filme con arreglo as siguientes variables

Num_films<-nota1$num_films
vMax=max(Num_films)
nota1 %>% filter(num_films==vMax) %>%
  select(-"num_films")%>% 
  knitr::kable(caption = " ", 
             digits=2,format.args = list(big.mark = ",", 
    scientific = FALSE),col.names=c("Nota","Lucro","Duracao","Orcamento"))
Nota Lucro Duracao Orcamento
6.7 13,048,875 109.16 40,519,064