#Import libraries
import pandas as pd
import requests
import tweepy
from tweepy import OAuthHandler
from tweepy import API
from tweepy import Cursor
import time
import datetime as dt
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
#data gathering section
Do the following activities: 1.Read from archive file¶
2.Read from TSV file with URL 3.Read from twitter via Twitter API
#Read CSV file into a dataframe using pandas read-csv function.
archive_df = pd.read_csv('twitter-archive-enhanced.csv')
#Read TSV file from a URL using requests function.
url = 'https://d17h27t6h515a5.cloudfront.net/topher/2017/August/599fd2ad_image-predictions/image-predictions.tsv'
r = requests.get(url, allow_redirects=True)
open('image_predictions.tsv', 'wb').write(r.content)
# read tsv file to dataframe
images_df = pd.read_csv('image-predictions.tsv', sep = '\t', encoding = 'utf-8')
#Read data from twitter using twitter API
consumer_key = '**' # fetched from twitter dev profile.
consumer_secret = '**' #fetched from twittter dev profile.
access_token = '**' #fetched from twitter dev profile.
access_token_secret = '**' #fetched from twitter dev profile.
#tweepy function for api access
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,
parser = tweepy.parsers.JSONParser(), wait_on_rate_limit = True, wait_on_rate_limit_notify = True)
# declare dictionary to hold twitter data
tweet_df_list = []
# Get the tweet details for every tweet id from archive dataframe
for tweet_id in archive_df['tweet_id']:
try:
page = api.get_status(tweet_id, tweet_mode = 'extended')
favorites = page['favorite_count']
retweets = page['retweet_count']
user_followers = page['user']['followers_count']
user_favourites = page['user']['favourites_count']
date_time = page['created_at']
tweet_df_list.append({'tweet_id': int(tweet_id),
'favorites': int(favorites),
'retweets': int(retweets),
'user_followers': int(user_followers),
'user_favourites': int(user_favourites),
'date_time': pd.to_datetime(date_time)})
# Catch the exceptions of the TweepError
except Exception as e:
print(str(tweet_id)+ " _ " + str(e))
# convert dictionary to dataframe
tweet_df = pd.DataFrame(tweet_df_list, columns = ['tweet_id', 'favorites', 'retweets',
'user_followers', 'user_favourites', 'date_time'])
# Save the dataFrame in file
tweet_df.to_csv('tweet_list.txt', encoding = 'utf-8', index=False)
888202515573088257 _ [{'code': 144, 'message': 'No status found with that ID.'}] 873697596434513921 _ [{'code': 144, 'message': 'No status found with that ID.'}] 869988702071779329 _ [{'code': 144, 'message': 'No status found with that ID.'}] 866816280283807744 _ [{'code': 144, 'message': 'No status found with that ID.'}] 861769973181624320 _ [{'code': 144, 'message': 'No status found with that ID.'}] 845459076796616705 _ [{'code': 144, 'message': 'No status found with that ID.'}] 842892208864923648 _ [{'code': 144, 'message': 'No status found with that ID.'}] 837012587749474308 _ [{'code': 144, 'message': 'No status found with that ID.'}] 827228250799742977 _ [{'code': 144, 'message': 'No status found with that ID.'}] 802247111496568832 _ [{'code': 144, 'message': 'No status found with that ID.'}] 775096608509886464 _ [{'code': 144, 'message': 'No status found with that ID.'}] Rate limit reached. Sleeping for: 732 754011816964026368 _ [{'code': 144, 'message': 'No status found with that ID.'}] Rate limit reached. Sleeping for: 731
# Read the tweet list file into a dataframe
tweet_df = pd.read_csv('tweet_list.txt', encoding = 'utf-8')
data gathering section ends...
data assessment section begins..
#Display archive dataframe for eye-ball check
archive_df
tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 892420643555336193 | NaN | NaN | 2017-08-01 16:23:56 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Phineas. He's a mystical boy. Only eve... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/892420643... | 13 | 10 | Phineas | None | None | None | None |
1 | 892177421306343426 | NaN | NaN | 2017-08-01 00:17:27 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Tilly. She's just checking pup on you.... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/892177421... | 13 | 10 | Tilly | None | None | None | None |
2 | 891815181378084864 | NaN | NaN | 2017-07-31 00:18:03 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Archie. He is a rare Norwegian Pouncin... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891815181... | 12 | 10 | Archie | None | None | None | None |
3 | 891689557279858688 | NaN | NaN | 2017-07-30 15:58:51 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Darla. She commenced a snooze mid meal... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891689557... | 13 | 10 | Darla | None | None | None | None |
4 | 891327558926688256 | NaN | NaN | 2017-07-29 16:00:24 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Franklin. He would like you to stop ca... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891327558... | 12 | 10 | Franklin | None | None | None | None |
5 | 891087950875897856 | NaN | NaN | 2017-07-29 00:08:17 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a majestic great white breaching ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891087950... | 13 | 10 | None | None | None | None | None |
6 | 890971913173991426 | NaN | NaN | 2017-07-28 16:27:12 +0000 | <a href="http://twitter.com/download/iphone" r... | Meet Jax. He enjoys ice cream so much he gets ... | NaN | NaN | NaN | https://gofundme.com/ydvmve-surgery-for-jax,ht... | 13 | 10 | Jax | None | None | None | None |
7 | 890729181411237888 | NaN | NaN | 2017-07-28 00:22:40 +0000 | <a href="http://twitter.com/download/iphone" r... | When you watch your owner call another dog a g... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/890729181... | 13 | 10 | None | None | None | None | None |
8 | 890609185150312448 | NaN | NaN | 2017-07-27 16:25:51 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Zoey. She doesn't want to be one of th... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/890609185... | 13 | 10 | Zoey | None | None | None | None |
9 | 890240255349198849 | NaN | NaN | 2017-07-26 15:59:51 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Cassie. She is a college pup. Studying... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/890240255... | 14 | 10 | Cassie | doggo | None | None | None |
10 | 890006608113172480 | NaN | NaN | 2017-07-26 00:31:25 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Koda. He is a South Australian decksha... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/890006608... | 13 | 10 | Koda | None | None | None | None |
11 | 889880896479866881 | NaN | NaN | 2017-07-25 16:11:53 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Bruno. He is a service shark. Only get... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889880896... | 13 | 10 | Bruno | None | None | None | None |
12 | 889665388333682689 | NaN | NaN | 2017-07-25 01:55:32 +0000 | <a href="http://twitter.com/download/iphone" r... | Here's a puppo that seems to be on the fence a... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889665388... | 13 | 10 | None | None | None | None | puppo |
13 | 889638837579907072 | NaN | NaN | 2017-07-25 00:10:02 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Ted. He does his best. Sometimes that'... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889638837... | 12 | 10 | Ted | None | None | None | None |
14 | 889531135344209921 | NaN | NaN | 2017-07-24 17:02:04 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Stuart. He's sporting his favorite fan... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889531135... | 13 | 10 | Stuart | None | None | None | puppo |
15 | 889278841981685760 | NaN | NaN | 2017-07-24 00:19:32 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Oliver. You're witnessing one of his m... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889278841... | 13 | 10 | Oliver | None | None | None | None |
16 | 888917238123831296 | NaN | NaN | 2017-07-23 00:22:39 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Jim. He found a fren. Taught him how t... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/888917238... | 12 | 10 | Jim | None | None | None | None |
17 | 888804989199671297 | NaN | NaN | 2017-07-22 16:56:37 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Zeke. He has a new stick. Very proud o... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/888804989... | 13 | 10 | Zeke | None | None | None | None |
18 | 888554962724278272 | NaN | NaN | 2017-07-22 00:23:06 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Ralphus. He's powering up. Attempting ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/888554962... | 13 | 10 | Ralphus | None | None | None | None |
19 | 888202515573088257 | NaN | NaN | 2017-07-21 01:02:36 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @dog_rates: This is Canela. She attempted s... | 8.874740e+17 | 4.196984e+09 | 2017-07-19 00:47:34 +0000 | https://twitter.com/dog_rates/status/887473957... | 13 | 10 | Canela | None | None | None | None |
20 | 888078434458587136 | NaN | NaN | 2017-07-20 16:49:33 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Gerald. He was just told he didn't get... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/888078434... | 12 | 10 | Gerald | None | None | None | None |
21 | 887705289381826560 | NaN | NaN | 2017-07-19 16:06:48 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Jeffrey. He has a monopoly on the pool... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887705289... | 13 | 10 | Jeffrey | None | None | None | None |
22 | 887517139158093824 | NaN | NaN | 2017-07-19 03:39:09 +0000 | <a href="http://twitter.com/download/iphone" r... | I've yet to rate a Venezuelan Hover Wiener. Th... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887517139... | 14 | 10 | such | None | None | None | None |
23 | 887473957103951883 | NaN | NaN | 2017-07-19 00:47:34 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Canela. She attempted some fancy porch... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887473957... | 13 | 10 | Canela | None | None | None | None |
24 | 887343217045368832 | NaN | NaN | 2017-07-18 16:08:03 +0000 | <a href="http://twitter.com/download/iphone" r... | You may not have known you needed to see this ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887343217... | 13 | 10 | None | None | None | None | None |
25 | 887101392804085760 | NaN | NaN | 2017-07-18 00:07:08 +0000 | <a href="http://twitter.com/download/iphone" r... | This... is a Jubilant Antarctic House Bear. We... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887101392... | 12 | 10 | None | None | None | None | None |
26 | 886983233522544640 | NaN | NaN | 2017-07-17 16:17:36 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Maya. She's very shy. Rarely leaves he... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/886983233... | 13 | 10 | Maya | None | None | None | None |
27 | 886736880519319552 | NaN | NaN | 2017-07-16 23:58:41 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Mingus. He's a wonderful father to his... | NaN | NaN | NaN | https://www.gofundme.com/mingusneedsus,https:/... | 13 | 10 | Mingus | None | None | None | None |
28 | 886680336477933568 | NaN | NaN | 2017-07-16 20:14:00 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Derek. He's late for a dog meeting. 13... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/886680336... | 13 | 10 | Derek | None | None | None | None |
29 | 886366144734445568 | NaN | NaN | 2017-07-15 23:25:31 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Roscoe. Another pupper fallen victim t... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/886366144... | 12 | 10 | Roscoe | None | None | pupper | None |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2326 | 666411507551481857 | NaN | NaN | 2015-11-17 00:24:19 +0000 | <a href="http://twitter.com/download/iphone" r... | This is quite the dog. Gets really excited whe... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666411507... | 2 | 10 | quite | None | None | None | None |
2327 | 666407126856765440 | NaN | NaN | 2015-11-17 00:06:54 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a southern Vesuvius bumblegruff. Can d... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666407126... | 7 | 10 | a | None | None | None | None |
2328 | 666396247373291520 | NaN | NaN | 2015-11-16 23:23:41 +0000 | <a href="http://twitter.com/download/iphone" r... | Oh goodness. A super rare northeast Qdoba kang... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666396247... | 9 | 10 | None | None | None | None | None |
2329 | 666373753744588802 | NaN | NaN | 2015-11-16 21:54:18 +0000 | <a href="http://twitter.com/download/iphone" r... | Those are sunglasses and a jean jacket. 11/10 ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666373753... | 11 | 10 | None | None | None | None | None |
2330 | 666362758909284353 | NaN | NaN | 2015-11-16 21:10:36 +0000 | <a href="http://twitter.com/download/iphone" r... | Unique dog here. Very small. Lives in containe... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666362758... | 6 | 10 | None | None | None | None | None |
2331 | 666353288456101888 | NaN | NaN | 2015-11-16 20:32:58 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a mixed Asiago from the GalƔpagos... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666353288... | 8 | 10 | None | None | None | None | None |
2332 | 666345417576210432 | NaN | NaN | 2015-11-16 20:01:42 +0000 | <a href="http://twitter.com/download/iphone" r... | Look at this jokester thinking seat belt laws ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666345417... | 10 | 10 | None | None | None | None | None |
2333 | 666337882303524864 | NaN | NaN | 2015-11-16 19:31:45 +0000 | <a href="http://twitter.com/download/iphone" r... | This is an extremely rare horned Parthenon. No... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666337882... | 9 | 10 | an | None | None | None | None |
2334 | 666293911632134144 | NaN | NaN | 2015-11-16 16:37:02 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a funny dog. Weird toes. Won't come do... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666293911... | 3 | 10 | a | None | None | None | None |
2335 | 666287406224695296 | NaN | NaN | 2015-11-16 16:11:11 +0000 | <a href="http://twitter.com/download/iphone" r... | This is an Albanian 3 1/2 legged Episcopalian... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666287406... | 1 | 2 | an | None | None | None | None |
2336 | 666273097616637952 | NaN | NaN | 2015-11-16 15:14:19 +0000 | <a href="http://twitter.com/download/iphone" r... | Can take selfies 11/10 https://t.co/ws2AMaNwPW | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666273097... | 11 | 10 | None | None | None | None | None |
2337 | 666268910803644416 | NaN | NaN | 2015-11-16 14:57:41 +0000 | <a href="http://twitter.com/download/iphone" r... | Very concerned about fellow dog trapped in com... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666268910... | 10 | 10 | None | None | None | None | None |
2338 | 666104133288665088 | NaN | NaN | 2015-11-16 04:02:55 +0000 | <a href="http://twitter.com/download/iphone" r... | Not familiar with this breed. No tail (weird).... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666104133... | 1 | 10 | None | None | None | None | None |
2339 | 666102155909144576 | NaN | NaN | 2015-11-16 03:55:04 +0000 | <a href="http://twitter.com/download/iphone" r... | Oh my. Here you are seeing an Adobe Setter giv... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666102155... | 11 | 10 | None | None | None | None | None |
2340 | 666099513787052032 | NaN | NaN | 2015-11-16 03:44:34 +0000 | <a href="http://twitter.com/download/iphone" r... | Can stand on stump for what seems like a while... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666099513... | 8 | 10 | None | None | None | None | None |
2341 | 666094000022159362 | NaN | NaN | 2015-11-16 03:22:39 +0000 | <a href="http://twitter.com/download/iphone" r... | This appears to be a Mongolian Presbyterian mi... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666094000... | 9 | 10 | None | None | None | None | None |
2342 | 666082916733198337 | NaN | NaN | 2015-11-16 02:38:37 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a well-established sunblockerspan... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666082916... | 6 | 10 | None | None | None | None | None |
2343 | 666073100786774016 | NaN | NaN | 2015-11-16 01:59:36 +0000 | <a href="http://twitter.com/download/iphone" r... | Let's hope this flight isn't Malaysian (lol). ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666073100... | 10 | 10 | None | None | None | None | None |
2344 | 666071193221509120 | NaN | NaN | 2015-11-16 01:52:02 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a northern speckled Rhododendron.... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666071193... | 9 | 10 | None | None | None | None | None |
2345 | 666063827256086533 | NaN | NaN | 2015-11-16 01:22:45 +0000 | <a href="http://twitter.com/download/iphone" r... | This is the happiest dog you will ever see. Ve... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666063827... | 10 | 10 | the | None | None | None | None |
2346 | 666058600524156928 | NaN | NaN | 2015-11-16 01:01:59 +0000 | <a href="http://twitter.com/download/iphone" r... | Here is the Rand Paul of retrievers folks! He'... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666058600... | 8 | 10 | the | None | None | None | None |
2347 | 666057090499244032 | NaN | NaN | 2015-11-16 00:55:59 +0000 | <a href="http://twitter.com/download/iphone" r... | My oh my. This is a rare blond Canadian terrie... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666057090... | 9 | 10 | a | None | None | None | None |
2348 | 666055525042405380 | NaN | NaN | 2015-11-16 00:49:46 +0000 | <a href="http://twitter.com/download/iphone" r... | Here is a Siberian heavily armored polar bear ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666055525... | 10 | 10 | a | None | None | None | None |
2349 | 666051853826850816 | NaN | NaN | 2015-11-16 00:35:11 +0000 | <a href="http://twitter.com/download/iphone" r... | This is an odd dog. Hard on the outside but lo... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666051853... | 2 | 10 | an | None | None | None | None |
2350 | 666050758794694657 | NaN | NaN | 2015-11-16 00:30:50 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a truly beautiful English Wilson Staff... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666050758... | 10 | 10 | a | None | None | None | None |
2351 | 666049248165822465 | NaN | NaN | 2015-11-16 00:24:50 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a 1949 1st generation vulpix. Enj... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666049248... | 5 | 10 | None | None | None | None | None |
2352 | 666044226329800704 | NaN | NaN | 2015-11-16 00:04:52 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a purebred Piers Morgan. Loves to Netf... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666044226... | 6 | 10 | a | None | None | None | None |
2353 | 666033412701032449 | NaN | NaN | 2015-11-15 23:21:54 +0000 | <a href="http://twitter.com/download/iphone" r... | Here is a very happy pup. Big fan of well-main... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666033412... | 9 | 10 | a | None | None | None | None |
2354 | 666029285002620928 | NaN | NaN | 2015-11-15 23:05:30 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a western brown Mitsubishi terrier. Up... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666029285... | 7 | 10 | a | None | None | None | None |
2355 | 666020888022790149 | NaN | NaN | 2015-11-15 22:32:08 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a Japanese Irish Setter. Lost eye... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666020888... | 8 | 10 | None | None | None | None | None |
2356 rows × 17 columns
#Display image dataframe for eye-ball check
images_df
tweet_id | jpg_url | img_num | p1 | p1_conf | p1_dog | p2 | p2_conf | p2_dog | p3 | p3_conf | p3_dog | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 666020888022790149 | https://pbs.twimg.com/media/CT4udn0WwAA0aMy.jpg | 1 | Welsh_springer_spaniel | 0.465074 | True | collie | 0.156665 | True | Shetland_sheepdog | 0.061428 | True |
1 | 666029285002620928 | https://pbs.twimg.com/media/CT42GRgUYAA5iDo.jpg | 1 | redbone | 0.506826 | True | miniature_pinscher | 0.074192 | True | Rhodesian_ridgeback | 0.072010 | True |
2 | 666033412701032449 | https://pbs.twimg.com/media/CT4521TWwAEvMyu.jpg | 1 | German_shepherd | 0.596461 | True | malinois | 0.138584 | True | bloodhound | 0.116197 | True |
3 | 666044226329800704 | https://pbs.twimg.com/media/CT5Dr8HUEAA-lEu.jpg | 1 | Rhodesian_ridgeback | 0.408143 | True | redbone | 0.360687 | True | miniature_pinscher | 0.222752 | True |
4 | 666049248165822465 | https://pbs.twimg.com/media/CT5IQmsXIAAKY4A.jpg | 1 | miniature_pinscher | 0.560311 | True | Rottweiler | 0.243682 | True | Doberman | 0.154629 | True |
5 | 666050758794694657 | https://pbs.twimg.com/media/CT5Jof1WUAEuVxN.jpg | 1 | Bernese_mountain_dog | 0.651137 | True | English_springer | 0.263788 | True | Greater_Swiss_Mountain_dog | 0.016199 | True |
6 | 666051853826850816 | https://pbs.twimg.com/media/CT5KoJ1WoAAJash.jpg | 1 | box_turtle | 0.933012 | False | mud_turtle | 0.045885 | False | terrapin | 0.017885 | False |
7 | 666055525042405380 | https://pbs.twimg.com/media/CT5N9tpXIAAifs1.jpg | 1 | chow | 0.692517 | True | Tibetan_mastiff | 0.058279 | True | fur_coat | 0.054449 | False |
8 | 666057090499244032 | https://pbs.twimg.com/media/CT5PY90WoAAQGLo.jpg | 1 | shopping_cart | 0.962465 | False | shopping_basket | 0.014594 | False | golden_retriever | 0.007959 | True |
9 | 666058600524156928 | https://pbs.twimg.com/media/CT5Qw94XAAA_2dP.jpg | 1 | miniature_poodle | 0.201493 | True | komondor | 0.192305 | True | soft-coated_wheaten_terrier | 0.082086 | True |
10 | 666063827256086533 | https://pbs.twimg.com/media/CT5Vg_wXIAAXfnj.jpg | 1 | golden_retriever | 0.775930 | True | Tibetan_mastiff | 0.093718 | True | Labrador_retriever | 0.072427 | True |
11 | 666071193221509120 | https://pbs.twimg.com/media/CT5cN_3WEAAlOoZ.jpg | 1 | Gordon_setter | 0.503672 | True | Yorkshire_terrier | 0.174201 | True | Pekinese | 0.109454 | True |
12 | 666073100786774016 | https://pbs.twimg.com/media/CT5d9DZXAAALcwe.jpg | 1 | Walker_hound | 0.260857 | True | English_foxhound | 0.175382 | True | Ibizan_hound | 0.097471 | True |
13 | 666082916733198337 | https://pbs.twimg.com/media/CT5m4VGWEAAtKc8.jpg | 1 | pug | 0.489814 | True | bull_mastiff | 0.404722 | True | French_bulldog | 0.048960 | True |
14 | 666094000022159362 | https://pbs.twimg.com/media/CT5w9gUW4AAsBNN.jpg | 1 | bloodhound | 0.195217 | True | German_shepherd | 0.078260 | True | malinois | 0.075628 | True |
15 | 666099513787052032 | https://pbs.twimg.com/media/CT51-JJUEAA6hV8.jpg | 1 | Lhasa | 0.582330 | True | Shih-Tzu | 0.166192 | True | Dandie_Dinmont | 0.089688 | True |
16 | 666102155909144576 | https://pbs.twimg.com/media/CT54YGiWUAEZnoK.jpg | 1 | English_setter | 0.298617 | True | Newfoundland | 0.149842 | True | borzoi | 0.133649 | True |
17 | 666104133288665088 | https://pbs.twimg.com/media/CT56LSZWoAAlJj2.jpg | 1 | hen | 0.965932 | False | cock | 0.033919 | False | partridge | 0.000052 | False |
18 | 666268910803644416 | https://pbs.twimg.com/media/CT8QCd1WEAADXws.jpg | 1 | desktop_computer | 0.086502 | False | desk | 0.085547 | False | bookcase | 0.079480 | False |
19 | 666273097616637952 | https://pbs.twimg.com/media/CT8T1mtUwAA3aqm.jpg | 1 | Italian_greyhound | 0.176053 | True | toy_terrier | 0.111884 | True | basenji | 0.111152 | True |
20 | 666287406224695296 | https://pbs.twimg.com/media/CT8g3BpUEAAuFjg.jpg | 1 | Maltese_dog | 0.857531 | True | toy_poodle | 0.063064 | True | miniature_poodle | 0.025581 | True |
21 | 666293911632134144 | https://pbs.twimg.com/media/CT8mx7KW4AEQu8N.jpg | 1 | three-toed_sloth | 0.914671 | False | otter | 0.015250 | False | great_grey_owl | 0.013207 | False |
22 | 666337882303524864 | https://pbs.twimg.com/media/CT9OwFIWEAMuRje.jpg | 1 | ox | 0.416669 | False | Newfoundland | 0.278407 | True | groenendael | 0.102643 | True |
23 | 666345417576210432 | https://pbs.twimg.com/media/CT9Vn7PWoAA_ZCM.jpg | 1 | golden_retriever | 0.858744 | True | Chesapeake_Bay_retriever | 0.054787 | True | Labrador_retriever | 0.014241 | True |
24 | 666353288456101888 | https://pbs.twimg.com/media/CT9cx0tUEAAhNN_.jpg | 1 | malamute | 0.336874 | True | Siberian_husky | 0.147655 | True | Eskimo_dog | 0.093412 | True |
25 | 666362758909284353 | https://pbs.twimg.com/media/CT9lXGsUcAAyUFt.jpg | 1 | guinea_pig | 0.996496 | False | skunk | 0.002402 | False | hamster | 0.000461 | False |
26 | 666373753744588802 | https://pbs.twimg.com/media/CT9vZEYWUAAlZ05.jpg | 1 | soft-coated_wheaten_terrier | 0.326467 | True | Afghan_hound | 0.259551 | True | briard | 0.206803 | True |
27 | 666396247373291520 | https://pbs.twimg.com/media/CT-D2ZHWIAA3gK1.jpg | 1 | Chihuahua | 0.978108 | True | toy_terrier | 0.009397 | True | papillon | 0.004577 | True |
28 | 666407126856765440 | https://pbs.twimg.com/media/CT-NvwmW4AAugGZ.jpg | 1 | black-and-tan_coonhound | 0.529139 | True | bloodhound | 0.244220 | True | flat-coated_retriever | 0.173810 | True |
29 | 666411507551481857 | https://pbs.twimg.com/media/CT-RugiWIAELEaq.jpg | 1 | coho | 0.404640 | False | barracouta | 0.271485 | False | gar | 0.189945 | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2045 | 886366144734445568 | https://pbs.twimg.com/media/DE0BTnQUwAApKEH.jpg | 1 | French_bulldog | 0.999201 | True | Chihuahua | 0.000361 | True | Boston_bull | 0.000076 | True |
2046 | 886680336477933568 | https://pbs.twimg.com/media/DE4fEDzWAAAyHMM.jpg | 1 | convertible | 0.738995 | False | sports_car | 0.139952 | False | car_wheel | 0.044173 | False |
2047 | 886736880519319552 | https://pbs.twimg.com/media/DE5Se8FXcAAJFx4.jpg | 1 | kuvasz | 0.309706 | True | Great_Pyrenees | 0.186136 | True | Dandie_Dinmont | 0.086346 | True |
2048 | 886983233522544640 | https://pbs.twimg.com/media/DE8yicJW0AAAvBJ.jpg | 2 | Chihuahua | 0.793469 | True | toy_terrier | 0.143528 | True | can_opener | 0.032253 | False |
2049 | 887101392804085760 | https://pbs.twimg.com/media/DE-eAq6UwAA-jaE.jpg | 1 | Samoyed | 0.733942 | True | Eskimo_dog | 0.035029 | True | Staffordshire_bullterrier | 0.029705 | True |
2050 | 887343217045368832 | https://pbs.twimg.com/ext_tw_video_thumb/88734... | 1 | Mexican_hairless | 0.330741 | True | sea_lion | 0.275645 | False | Weimaraner | 0.134203 | True |
2051 | 887473957103951883 | https://pbs.twimg.com/media/DFDw2tyUQAAAFke.jpg | 2 | Pembroke | 0.809197 | True | Rhodesian_ridgeback | 0.054950 | True | beagle | 0.038915 | True |
2052 | 887517139158093824 | https://pbs.twimg.com/ext_tw_video_thumb/88751... | 1 | limousine | 0.130432 | False | tow_truck | 0.029175 | False | shopping_cart | 0.026321 | False |
2053 | 887705289381826560 | https://pbs.twimg.com/media/DFHDQBbXgAEqY7t.jpg | 1 | basset | 0.821664 | True | redbone | 0.087582 | True | Weimaraner | 0.026236 | True |
2054 | 888078434458587136 | https://pbs.twimg.com/media/DFMWn56WsAAkA7B.jpg | 1 | French_bulldog | 0.995026 | True | pug | 0.000932 | True | bull_mastiff | 0.000903 | True |
2055 | 888202515573088257 | https://pbs.twimg.com/media/DFDw2tyUQAAAFke.jpg | 2 | Pembroke | 0.809197 | True | Rhodesian_ridgeback | 0.054950 | True | beagle | 0.038915 | True |
2056 | 888554962724278272 | https://pbs.twimg.com/media/DFTH_O-UQAACu20.jpg | 3 | Siberian_husky | 0.700377 | True | Eskimo_dog | 0.166511 | True | malamute | 0.111411 | True |
2057 | 888804989199671297 | https://pbs.twimg.com/media/DFWra-3VYAA2piG.jpg | 1 | golden_retriever | 0.469760 | True | Labrador_retriever | 0.184172 | True | English_setter | 0.073482 | True |
2058 | 888917238123831296 | https://pbs.twimg.com/media/DFYRgsOUQAARGhO.jpg | 1 | golden_retriever | 0.714719 | True | Tibetan_mastiff | 0.120184 | True | Labrador_retriever | 0.105506 | True |
2059 | 889278841981685760 | https://pbs.twimg.com/ext_tw_video_thumb/88927... | 1 | whippet | 0.626152 | True | borzoi | 0.194742 | True | Saluki | 0.027351 | True |
2060 | 889531135344209921 | https://pbs.twimg.com/media/DFg_2PVW0AEHN3p.jpg | 1 | golden_retriever | 0.953442 | True | Labrador_retriever | 0.013834 | True | redbone | 0.007958 | True |
2061 | 889638837579907072 | https://pbs.twimg.com/media/DFihzFfXsAYGDPR.jpg | 1 | French_bulldog | 0.991650 | True | boxer | 0.002129 | True | Staffordshire_bullterrier | 0.001498 | True |
2062 | 889665388333682689 | https://pbs.twimg.com/media/DFi579UWsAAatzw.jpg | 1 | Pembroke | 0.966327 | True | Cardigan | 0.027356 | True | basenji | 0.004633 | True |
2063 | 889880896479866881 | https://pbs.twimg.com/media/DFl99B1WsAITKsg.jpg | 1 | French_bulldog | 0.377417 | True | Labrador_retriever | 0.151317 | True | muzzle | 0.082981 | False |
2064 | 890006608113172480 | https://pbs.twimg.com/media/DFnwSY4WAAAMliS.jpg | 1 | Samoyed | 0.957979 | True | Pomeranian | 0.013884 | True | chow | 0.008167 | True |
2065 | 890240255349198849 | https://pbs.twimg.com/media/DFrEyVuW0AAO3t9.jpg | 1 | Pembroke | 0.511319 | True | Cardigan | 0.451038 | True | Chihuahua | 0.029248 | True |
2066 | 890609185150312448 | https://pbs.twimg.com/media/DFwUU__XcAEpyXI.jpg | 1 | Irish_terrier | 0.487574 | True | Irish_setter | 0.193054 | True | Chesapeake_Bay_retriever | 0.118184 | True |
2067 | 890729181411237888 | https://pbs.twimg.com/media/DFyBahAVwAAhUTd.jpg | 2 | Pomeranian | 0.566142 | True | Eskimo_dog | 0.178406 | True | Pembroke | 0.076507 | True |
2068 | 890971913173991426 | https://pbs.twimg.com/media/DF1eOmZXUAALUcq.jpg | 1 | Appenzeller | 0.341703 | True | Border_collie | 0.199287 | True | ice_lolly | 0.193548 | False |
2069 | 891087950875897856 | https://pbs.twimg.com/media/DF3HwyEWsAABqE6.jpg | 1 | Chesapeake_Bay_retriever | 0.425595 | True | Irish_terrier | 0.116317 | True | Indian_elephant | 0.076902 | False |
2070 | 891327558926688256 | https://pbs.twimg.com/media/DF6hr6BUMAAzZgT.jpg | 2 | basset | 0.555712 | True | English_springer | 0.225770 | True | German_short-haired_pointer | 0.175219 | True |
2071 | 891689557279858688 | https://pbs.twimg.com/media/DF_q7IAWsAEuuN8.jpg | 1 | paper_towel | 0.170278 | False | Labrador_retriever | 0.168086 | True | spatula | 0.040836 | False |
2072 | 891815181378084864 | https://pbs.twimg.com/media/DGBdLU1WsAANxJ9.jpg | 1 | Chihuahua | 0.716012 | True | malamute | 0.078253 | True | kelpie | 0.031379 | True |
2073 | 892177421306343426 | https://pbs.twimg.com/media/DGGmoV4XsAAUL6n.jpg | 1 | Chihuahua | 0.323581 | True | Pekinese | 0.090647 | True | papillon | 0.068957 | True |
2074 | 892420643555336193 | https://pbs.twimg.com/media/DGKD1-bXoAAIAUK.jpg | 1 | orange | 0.097049 | False | bagel | 0.085851 | False | banana | 0.076110 | False |
2075 rows × 12 columns
#Display tweet dataframe for eye-ball check
tweet_df
tweet_id | favorites | retweets | user_followers | user_favourites | date_time | |
---|---|---|---|---|---|---|
0 | 892420643555336193 | 38680 | 8559 | 6997101 | 134700 | 2017-08-01 16:23:56 |
1 | 892177421306343426 | 33156 | 6291 | 6997101 | 134700 | 2017-08-01 00:17:27 |
2 | 891815181378084864 | 24958 | 4170 | 6997101 | 134700 | 2017-07-31 00:18:03 |
3 | 891689557279858688 | 42064 | 8687 | 6997101 | 134700 | 2017-07-30 15:58:51 |
4 | 891327558926688256 | 40212 | 9447 | 6997101 | 134700 | 2017-07-29 16:00:24 |
5 | 891087950875897856 | 20163 | 3127 | 6997101 | 134700 | 2017-07-29 00:08:17 |
6 | 890971913173991426 | 11816 | 2082 | 6997101 | 134700 | 2017-07-28 16:27:12 |
7 | 890729181411237888 | 65338 | 18971 | 6997101 | 134700 | 2017-07-28 00:22:40 |
8 | 890609185150312448 | 27713 | 4279 | 6997101 | 134700 | 2017-07-27 16:25:51 |
9 | 890240255349198849 | 31857 | 7448 | 6997101 | 134700 | 2017-07-26 15:59:51 |
10 | 890006608113172480 | 30583 | 7363 | 6997101 | 134700 | 2017-07-26 00:31:25 |
11 | 889880896479866881 | 27719 | 4989 | 6997101 | 134700 | 2017-07-25 16:11:53 |
12 | 889665388333682689 | 48003 | 10101 | 6997101 | 134700 | 2017-07-25 01:55:32 |
13 | 889638837579907072 | 27103 | 4564 | 6997101 | 134700 | 2017-07-25 00:10:02 |
14 | 889531135344209921 | 15052 | 2244 | 6997101 | 134700 | 2017-07-24 17:02:04 |
15 | 889278841981685760 | 25236 | 5446 | 6997101 | 134700 | 2017-07-24 00:19:32 |
16 | 888917238123831296 | 28996 | 4517 | 6997101 | 134700 | 2017-07-23 00:22:39 |
17 | 888804989199671297 | 25527 | 4363 | 6997101 | 134700 | 2017-07-22 16:56:37 |
18 | 888554962724278272 | 19850 | 3596 | 6997101 | 134700 | 2017-07-22 00:23:06 |
19 | 888078434458587136 | 21702 | 3511 | 6997101 | 134700 | 2017-07-20 16:49:33 |
20 | 887705289381826560 | 30108 | 5414 | 6997101 | 134700 | 2017-07-19 16:06:48 |
21 | 887517139158093824 | 46119 | 11713 | 6997101 | 134700 | 2017-07-19 03:39:09 |
22 | 887473957103951883 | 68935 | 18305 | 6997101 | 134700 | 2017-07-19 00:47:34 |
23 | 887343217045368832 | 33582 | 10446 | 6997101 | 134700 | 2017-07-18 16:08:03 |
24 | 887101392804085760 | 30451 | 5984 | 6997101 | 134700 | 2017-07-18 00:07:08 |
25 | 886983233522544640 | 35058 | 7806 | 6997101 | 134700 | 2017-07-17 16:17:36 |
26 | 886736880519319552 | 12037 | 3311 | 6997101 | 134700 | 2017-07-16 23:58:41 |
27 | 886680336477933568 | 22362 | 4486 | 6997101 | 134700 | 2017-07-16 20:14:00 |
28 | 886366144734445568 | 21142 | 3208 | 6997102 | 134700 | 2017-07-15 23:25:31 |
29 | 886267009285017600 | 116 | 4 | 6997102 | 134700 | 2017-07-15 16:51:35 |
... | ... | ... | ... | ... | ... | ... |
2314 | 666411507551481857 | 447 | 328 | 6997189 | 134700 | 2015-11-17 00:24:19 |
2315 | 666407126856765440 | 110 | 41 | 6997189 | 134700 | 2015-11-17 00:06:54 |
2316 | 666396247373291520 | 167 | 86 | 6997189 | 134700 | 2015-11-16 23:23:41 |
2317 | 666373753744588802 | 190 | 93 | 6997189 | 134700 | 2015-11-16 21:54:18 |
2318 | 666362758909284353 | 778 | 574 | 6997189 | 134700 | 2015-11-16 21:10:36 |
2319 | 666353288456101888 | 221 | 73 | 6997190 | 134700 | 2015-11-16 20:32:58 |
2320 | 666345417576210432 | 299 | 139 | 6997190 | 134700 | 2015-11-16 20:01:42 |
2321 | 666337882303524864 | 198 | 92 | 6997190 | 134700 | 2015-11-16 19:31:45 |
2322 | 666293911632134144 | 509 | 357 | 6997190 | 134700 | 2015-11-16 16:37:02 |
2323 | 666287406224695296 | 149 | 66 | 6997190 | 134700 | 2015-11-16 16:11:11 |
2324 | 666273097616637952 | 176 | 76 | 6997190 | 134700 | 2015-11-16 15:14:19 |
2325 | 666268910803644416 | 104 | 35 | 6997190 | 134700 | 2015-11-16 14:57:41 |
2326 | 666104133288665088 | 14346 | 6634 | 6997190 | 134700 | 2015-11-16 04:02:55 |
2327 | 666102155909144576 | 80 | 13 | 6997190 | 134700 | 2015-11-16 03:55:04 |
2328 | 666099513787052032 | 156 | 68 | 6997190 | 134700 | 2015-11-16 03:44:34 |
2329 | 666094000022159362 | 164 | 74 | 6997190 | 134700 | 2015-11-16 03:22:39 |
2330 | 666082916733198337 | 119 | 45 | 6997190 | 134700 | 2015-11-16 02:38:37 |
2331 | 666073100786774016 | 322 | 164 | 6997190 | 134700 | 2015-11-16 01:59:36 |
2332 | 666071193221509120 | 148 | 62 | 6997190 | 134700 | 2015-11-16 01:52:02 |
2333 | 666063827256086533 | 476 | 219 | 6997190 | 134700 | 2015-11-16 01:22:45 |
2334 | 666058600524156928 | 112 | 57 | 6997190 | 134700 | 2015-11-16 01:01:59 |
2335 | 666057090499244032 | 298 | 142 | 6997190 | 134700 | 2015-11-16 00:55:59 |
2336 | 666055525042405380 | 434 | 252 | 6997190 | 134700 | 2015-11-16 00:49:46 |
2337 | 666051853826850816 | 1223 | 853 | 6997190 | 134700 | 2015-11-16 00:35:11 |
2338 | 666050758794694657 | 132 | 58 | 6997190 | 134700 | 2015-11-16 00:30:50 |
2339 | 666049248165822465 | 109 | 41 | 6997190 | 134700 | 2015-11-16 00:24:50 |
2340 | 666044226329800704 | 299 | 141 | 6997190 | 134700 | 2015-11-16 00:04:52 |
2341 | 666033412701032449 | 125 | 44 | 6997190 | 134700 | 2015-11-15 23:21:54 |
2342 | 666029285002620928 | 129 | 47 | 6997190 | 134700 | 2015-11-15 23:05:30 |
2343 | 666020888022790149 | 2560 | 517 | 6997190 | 134700 | 2015-11-15 22:32:08 |
2344 rows × 6 columns
#random check on all 3 data sources...
# Archive dataframe
print(archive_df['text'][100])
print(archive_df['expanded_urls'][200])
# image dataframe
print(images_df['p1'][50])
print(images_df['p2'][150])
# tweet data dataframe
print(tweet_df['tweet_id'][50])
print(tweet_df['date_time'][150])
Here are my favorite #dogsatpollingstations Most voted for a more consistent walking schedule and to increase daily pats tenfold. All 13/10 https://t.co/17FVMl4VZ5 https://twitter.com/dog_rates/status/854010172552949760/photo/1,https://twitter.com/dog_rates/status/854010172552949760/photo/1 triceratops pug 882268110199369728 2017-05-10 00:08:34
# Assess archive data programatically
archive_df.info()
archive_df.describe()
archive_df['rating_numerator'].value_counts()
archive_df['rating_denominator'].value_counts()
archive_df['name'].value_counts()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2356 entries, 0 to 2355 Data columns (total 17 columns): tweet_id 2356 non-null int64 in_reply_to_status_id 78 non-null float64 in_reply_to_user_id 78 non-null float64 timestamp 2356 non-null object source 2356 non-null object text 2356 non-null object retweeted_status_id 181 non-null float64 retweeted_status_user_id 181 non-null float64 retweeted_status_timestamp 181 non-null object expanded_urls 2297 non-null object rating_numerator 2356 non-null int64 rating_denominator 2356 non-null int64 name 2356 non-null object doggo 2356 non-null object floofer 2356 non-null object pupper 2356 non-null object puppo 2356 non-null object dtypes: float64(4), int64(3), object(10) memory usage: 313.0+ KB
None 745 a 55 Charlie 12 Cooper 11 Lucy 11 Oliver 11 Tucker 10 Lola 10 Penny 10 Winston 9 Bo 9 Sadie 8 the 8 Bailey 7 Toby 7 Daisy 7 an 7 Buddy 7 Leo 6 Jax 6 Rusty 6 Milo 6 Jack 6 Dave 6 Oscar 6 Koda 6 Bella 6 Scout 6 Stanley 6 Larry 5 ... Wiggles 1 Comet 1 Monkey 1 Millie 1 Shikha 1 Linus 1 Schnitzel 1 Burt 1 Willy 1 Napolean 1 Ronduh 1 Miguel 1 Karma 1 Skittles 1 Jerome 1 Jennifur 1 Flash 1 Danny 1 Ralphson 1 Alejandro 1 Jomathan 1 Fiji 1 Livvie 1 Kaiya 1 Michelangelope 1 Cedrick 1 his 1 Ed 1 Brandi 1 Rumble 1 Name: name, Length: 957, dtype: int64
# Assess image data programatically
images_df.info()
images_df.describe()
images_df['tweet_id'].value_counts()
images_df['jpg_url'].value_counts()
images_df['p1'].value_counts()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2075 entries, 0 to 2074 Data columns (total 12 columns): tweet_id 2075 non-null int64 jpg_url 2075 non-null object img_num 2075 non-null int64 p1 2075 non-null object p1_conf 2075 non-null float64 p1_dog 2075 non-null bool p2 2075 non-null object p2_conf 2075 non-null float64 p2_dog 2075 non-null bool p3 2075 non-null object p3_conf 2075 non-null float64 p3_dog 2075 non-null bool dtypes: bool(3), float64(3), int64(2), object(4) memory usage: 152.1+ KB
golden_retriever 150 Labrador_retriever 100 Pembroke 89 Chihuahua 83 pug 57 chow 44 Samoyed 43 toy_poodle 39 Pomeranian 38 malamute 30 cocker_spaniel 30 French_bulldog 26 Chesapeake_Bay_retriever 23 miniature_pinscher 23 seat_belt 22 Staffordshire_bullterrier 20 German_shepherd 20 Siberian_husky 20 Cardigan 19 web_site 19 Eskimo_dog 18 teddy 18 Maltese_dog 18 Shetland_sheepdog 18 beagle 18 Rottweiler 17 Shih-Tzu 17 Lakeland_terrier 17 kuvasz 16 Italian_greyhound 16 ... alp 1 lacewing 1 water_bottle 1 flamingo 1 piggy_bank 1 wild_boar 1 lorikeet 1 boathouse 1 stove 1 ibex 1 hummingbird 1 four-poster 1 Scotch_terrier 1 hotdog 1 prayer_rug 1 rapeseed 1 guenon 1 electric_fan 1 hammer 1 teapot 1 military_uniform 1 scorpion 1 African_crocodile 1 sea_urchin 1 pitcher 1 lynx 1 envelope 1 banana 1 standard_schnauzer 1 ice_lolly 1 Name: p1, Length: 378, dtype: int64
# Assess tweet data programatically
tweet_df.info()
tweet_df.describe()
tweet_df['tweet_id'].value_counts()
tweet_df['user_followers'].value_counts()
tweet_df['user_favourites'].value_counts()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2344 entries, 0 to 2343 Data columns (total 6 columns): tweet_id 2344 non-null int64 favorites 2344 non-null int64 retweets 2344 non-null int64 user_followers 2344 non-null int64 user_favourites 2344 non-null int64 date_time 2344 non-null object dtypes: int64(5), object(1) memory usage: 110.0+ KB
134700 2326 134701 18 Name: user_favourites, dtype: int64
Assessment section ends..
Cleaning section begins
Create copies of each dataframes before actual cleaning process.
archive_df_copy = archive_df.copy()
images_df_copy = images_df.copy()
tweet_df_copy = tweet_df.copy()
#verify copy
archive_df_copy.head()
images_df_copy.head()
tweet_df_copy.head()
tweet_id | favorites | retweets | user_followers | user_favourites | date_time | |
---|---|---|---|---|---|---|
0 | 892420643555336193 | 38680 | 8559 | 6997101 | 134700 | 2017-08-01 16:23:56 |
1 | 892177421306343426 | 33156 | 6291 | 6997101 | 134700 | 2017-08-01 00:17:27 |
2 | 891815181378084864 | 24958 | 4170 | 6997101 | 134700 | 2017-07-31 00:18:03 |
3 | 891689557279858688 | 42064 | 8687 | 6997101 | 134700 | 2017-07-30 15:58:51 |
4 | 891327558926688256 | 40212 | 9447 | 6997101 | 134700 | 2017-07-29 16:00:24 |
data copying ends....
Define:¶
Merge 3 data sources into a single dataframe and then save it to a CSV file. Use tweet_id as key to merge. This is a tidyness issue.
#Code
df_master = pd.merge(archive_df, images_df, how = 'inner', on = ['tweet_id'] )
df_master = pd.merge(df_master, tweet_df, how = 'inner', on = ['tweet_id'])
df_master.to_csv('df_master.csv', encoding = 'utf-8')
#Test
df_master.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2068 entries, 0 to 2067 Data columns (total 33 columns): tweet_id 2068 non-null int64 in_reply_to_status_id 23 non-null float64 in_reply_to_user_id 23 non-null float64 timestamp 2068 non-null object source 2068 non-null object text 2068 non-null object retweeted_status_id 75 non-null float64 retweeted_status_user_id 75 non-null float64 retweeted_status_timestamp 75 non-null object expanded_urls 2068 non-null object rating_numerator 2068 non-null int64 rating_denominator 2068 non-null int64 name 2068 non-null object doggo 2068 non-null object floofer 2068 non-null object pupper 2068 non-null object puppo 2068 non-null object jpg_url 2068 non-null object img_num 2068 non-null int64 p1 2068 non-null object p1_conf 2068 non-null float64 p1_dog 2068 non-null bool p2 2068 non-null object p2_conf 2068 non-null float64 p2_dog 2068 non-null bool p3 2068 non-null object p3_conf 2068 non-null float64 p3_dog 2068 non-null bool favorites 2068 non-null int64 retweets 2068 non-null int64 user_followers 2068 non-null int64 user_favourites 2068 non-null int64 date_time 2068 non-null object dtypes: bool(3), float64(7), int64(8), object(15) memory usage: 506.9+ KB
#Test
#count before deletion
len(df_master)
2068
Define¶
Delete Duplicate rows.
#Code
df_master = df_master.drop_duplicates()
#Test
#check for duplicates after deleting duplicates.. A value of zero rows suggests no duplicates any more..
df_master[df_master.duplicated(keep=False)]
tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | ... | p2_conf | p2_dog | p3 | p3_conf | p3_dog | favorites | retweets | user_followers | user_favourites | date_time |
---|
0 rows × 33 columns
Define¶
Delete retweet rows.. Delete rows where retweeted_status_id, retweeted_status_user_id are not NaN
#Code
some_values = ['NaN']
df_master = df_master.loc[df_master['retweeted_status_id'].isin(some_values)]
df_master = df_master.loc[df_master['retweeted_status_user_id'].isin(some_values)]
#test
#check whether retweet rows filtered out
df_master
tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | ... | p2_conf | p2_dog | p3 | p3_conf | p3_dog | favorites | retweets | user_followers | user_favourites | date_time | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 892420643555336193 | NaN | NaN | 2017-08-01 16:23:56 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Phineas. He's a mystical boy. Only eve... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/892420643... | ... | 0.085851 | False | banana | 0.076110 | False | 38680 | 8559 | 6997101 | 134700 | 2017-08-01 16:23:56 |
1 | 892177421306343426 | NaN | NaN | 2017-08-01 00:17:27 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Tilly. She's just checking pup on you.... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/892177421... | ... | 0.090647 | True | papillon | 0.068957 | True | 33156 | 6291 | 6997101 | 134700 | 2017-08-01 00:17:27 |
2 | 891815181378084864 | NaN | NaN | 2017-07-31 00:18:03 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Archie. He is a rare Norwegian Pouncin... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891815181... | ... | 0.078253 | True | kelpie | 0.031379 | True | 24958 | 4170 | 6997101 | 134700 | 2017-07-31 00:18:03 |
3 | 891689557279858688 | NaN | NaN | 2017-07-30 15:58:51 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Darla. She commenced a snooze mid meal... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891689557... | ... | 0.168086 | True | spatula | 0.040836 | False | 42064 | 8687 | 6997101 | 134700 | 2017-07-30 15:58:51 |
4 | 891327558926688256 | NaN | NaN | 2017-07-29 16:00:24 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Franklin. He would like you to stop ca... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891327558... | ... | 0.225770 | True | German_short-haired_pointer | 0.175219 | True | 40212 | 9447 | 6997101 | 134700 | 2017-07-29 16:00:24 |
5 | 891087950875897856 | NaN | NaN | 2017-07-29 00:08:17 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a majestic great white breaching ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891087950... | ... | 0.116317 | True | Indian_elephant | 0.076902 | False | 20163 | 3127 | 6997101 | 134700 | 2017-07-29 00:08:17 |
6 | 890971913173991426 | NaN | NaN | 2017-07-28 16:27:12 +0000 | <a href="http://twitter.com/download/iphone" r... | Meet Jax. He enjoys ice cream so much he gets ... | NaN | NaN | NaN | https://gofundme.com/ydvmve-surgery-for-jax,ht... | ... | 0.199287 | True | ice_lolly | 0.193548 | False | 11816 | 2082 | 6997101 | 134700 | 2017-07-28 16:27:12 |
7 | 890729181411237888 | NaN | NaN | 2017-07-28 00:22:40 +0000 | <a href="http://twitter.com/download/iphone" r... | When you watch your owner call another dog a g... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/890729181... | ... | 0.178406 | True | Pembroke | 0.076507 | True | 65338 | 18971 | 6997101 | 134700 | 2017-07-28 00:22:40 |
8 | 890609185150312448 | NaN | NaN | 2017-07-27 16:25:51 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Zoey. She doesn't want to be one of th... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/890609185... | ... | 0.193054 | True | Chesapeake_Bay_retriever | 0.118184 | True | 27713 | 4279 | 6997101 | 134700 | 2017-07-27 16:25:51 |
9 | 890240255349198849 | NaN | NaN | 2017-07-26 15:59:51 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Cassie. She is a college pup. Studying... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/890240255... | ... | 0.451038 | True | Chihuahua | 0.029248 | True | 31857 | 7448 | 6997101 | 134700 | 2017-07-26 15:59:51 |
10 | 890006608113172480 | NaN | NaN | 2017-07-26 00:31:25 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Koda. He is a South Australian decksha... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/890006608... | ... | 0.013884 | True | chow | 0.008167 | True | 30583 | 7363 | 6997101 | 134700 | 2017-07-26 00:31:25 |
11 | 889880896479866881 | NaN | NaN | 2017-07-25 16:11:53 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Bruno. He is a service shark. Only get... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889880896... | ... | 0.151317 | True | muzzle | 0.082981 | False | 27719 | 4989 | 6997101 | 134700 | 2017-07-25 16:11:53 |
12 | 889665388333682689 | NaN | NaN | 2017-07-25 01:55:32 +0000 | <a href="http://twitter.com/download/iphone" r... | Here's a puppo that seems to be on the fence a... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889665388... | ... | 0.027356 | True | basenji | 0.004633 | True | 48003 | 10101 | 6997101 | 134700 | 2017-07-25 01:55:32 |
13 | 889638837579907072 | NaN | NaN | 2017-07-25 00:10:02 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Ted. He does his best. Sometimes that'... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889638837... | ... | 0.002129 | True | Staffordshire_bullterrier | 0.001498 | True | 27103 | 4564 | 6997101 | 134700 | 2017-07-25 00:10:02 |
14 | 889531135344209921 | NaN | NaN | 2017-07-24 17:02:04 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Stuart. He's sporting his favorite fan... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889531135... | ... | 0.013834 | True | redbone | 0.007958 | True | 15052 | 2244 | 6997101 | 134700 | 2017-07-24 17:02:04 |
15 | 889278841981685760 | NaN | NaN | 2017-07-24 00:19:32 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Oliver. You're witnessing one of his m... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889278841... | ... | 0.194742 | True | Saluki | 0.027351 | True | 25236 | 5446 | 6997101 | 134700 | 2017-07-24 00:19:32 |
16 | 888917238123831296 | NaN | NaN | 2017-07-23 00:22:39 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Jim. He found a fren. Taught him how t... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/888917238... | ... | 0.120184 | True | Labrador_retriever | 0.105506 | True | 28996 | 4517 | 6997101 | 134700 | 2017-07-23 00:22:39 |
17 | 888804989199671297 | NaN | NaN | 2017-07-22 16:56:37 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Zeke. He has a new stick. Very proud o... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/888804989... | ... | 0.184172 | True | English_setter | 0.073482 | True | 25527 | 4363 | 6997101 | 134700 | 2017-07-22 16:56:37 |
18 | 888554962724278272 | NaN | NaN | 2017-07-22 00:23:06 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Ralphus. He's powering up. Attempting ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/888554962... | ... | 0.166511 | True | malamute | 0.111411 | True | 19850 | 3596 | 6997101 | 134700 | 2017-07-22 00:23:06 |
19 | 888078434458587136 | NaN | NaN | 2017-07-20 16:49:33 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Gerald. He was just told he didn't get... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/888078434... | ... | 0.000932 | True | bull_mastiff | 0.000903 | True | 21702 | 3511 | 6997101 | 134700 | 2017-07-20 16:49:33 |
20 | 887705289381826560 | NaN | NaN | 2017-07-19 16:06:48 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Jeffrey. He has a monopoly on the pool... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887705289... | ... | 0.087582 | True | Weimaraner | 0.026236 | True | 30108 | 5414 | 6997101 | 134700 | 2017-07-19 16:06:48 |
21 | 887517139158093824 | NaN | NaN | 2017-07-19 03:39:09 +0000 | <a href="http://twitter.com/download/iphone" r... | I've yet to rate a Venezuelan Hover Wiener. Th... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887517139... | ... | 0.029175 | False | shopping_cart | 0.026321 | False | 46119 | 11713 | 6997101 | 134700 | 2017-07-19 03:39:09 |
22 | 887473957103951883 | NaN | NaN | 2017-07-19 00:47:34 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Canela. She attempted some fancy porch... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887473957... | ... | 0.054950 | True | beagle | 0.038915 | True | 68935 | 18305 | 6997101 | 134700 | 2017-07-19 00:47:34 |
23 | 887343217045368832 | NaN | NaN | 2017-07-18 16:08:03 +0000 | <a href="http://twitter.com/download/iphone" r... | You may not have known you needed to see this ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887343217... | ... | 0.275645 | False | Weimaraner | 0.134203 | True | 33582 | 10446 | 6997101 | 134700 | 2017-07-18 16:08:03 |
24 | 887101392804085760 | NaN | NaN | 2017-07-18 00:07:08 +0000 | <a href="http://twitter.com/download/iphone" r... | This... is a Jubilant Antarctic House Bear. We... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887101392... | ... | 0.035029 | True | Staffordshire_bullterrier | 0.029705 | True | 30451 | 5984 | 6997101 | 134700 | 2017-07-18 00:07:08 |
25 | 886983233522544640 | NaN | NaN | 2017-07-17 16:17:36 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Maya. She's very shy. Rarely leaves he... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/886983233... | ... | 0.143528 | True | can_opener | 0.032253 | False | 35058 | 7806 | 6997101 | 134700 | 2017-07-17 16:17:36 |
26 | 886736880519319552 | NaN | NaN | 2017-07-16 23:58:41 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Mingus. He's a wonderful father to his... | NaN | NaN | NaN | https://www.gofundme.com/mingusneedsus,https:/... | ... | 0.186136 | True | Dandie_Dinmont | 0.086346 | True | 12037 | 3311 | 6997101 | 134700 | 2017-07-16 23:58:41 |
27 | 886680336477933568 | NaN | NaN | 2017-07-16 20:14:00 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Derek. He's late for a dog meeting. 13... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/886680336... | ... | 0.139952 | False | car_wheel | 0.044173 | False | 22362 | 4486 | 6997101 | 134700 | 2017-07-16 20:14:00 |
28 | 886366144734445568 | NaN | NaN | 2017-07-15 23:25:31 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Roscoe. Another pupper fallen victim t... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/886366144... | ... | 0.000361 | True | Boston_bull | 0.000076 | True | 21142 | 3208 | 6997102 | 134700 | 2017-07-15 23:25:31 |
29 | 886258384151887873 | NaN | NaN | 2017-07-15 16:17:19 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Waffles. His doggles are pupside down.... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/886258384... | ... | 0.025286 | False | Siamese_cat | 0.002849 | False | 27905 | 6316 | 6997102 | 134700 | 2017-07-15 16:17:19 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2038 | 666411507551481857 | NaN | NaN | 2015-11-17 00:24:19 +0000 | <a href="http://twitter.com/download/iphone" r... | This is quite the dog. Gets really excited whe... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666411507... | ... | 0.271485 | False | gar | 0.189945 | False | 447 | 328 | 6997189 | 134700 | 2015-11-17 00:24:19 |
2039 | 666407126856765440 | NaN | NaN | 2015-11-17 00:06:54 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a southern Vesuvius bumblegruff. Can d... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666407126... | ... | 0.244220 | True | flat-coated_retriever | 0.173810 | True | 110 | 41 | 6997189 | 134700 | 2015-11-17 00:06:54 |
2040 | 666396247373291520 | NaN | NaN | 2015-11-16 23:23:41 +0000 | <a href="http://twitter.com/download/iphone" r... | Oh goodness. A super rare northeast Qdoba kang... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666396247... | ... | 0.009397 | True | papillon | 0.004577 | True | 167 | 86 | 6997189 | 134700 | 2015-11-16 23:23:41 |
2041 | 666373753744588802 | NaN | NaN | 2015-11-16 21:54:18 +0000 | <a href="http://twitter.com/download/iphone" r... | Those are sunglasses and a jean jacket. 11/10 ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666373753... | ... | 0.259551 | True | briard | 0.206803 | True | 190 | 93 | 6997189 | 134700 | 2015-11-16 21:54:18 |
2042 | 666362758909284353 | NaN | NaN | 2015-11-16 21:10:36 +0000 | <a href="http://twitter.com/download/iphone" r... | Unique dog here. Very small. Lives in containe... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666362758... | ... | 0.002402 | False | hamster | 0.000461 | False | 778 | 574 | 6997189 | 134700 | 2015-11-16 21:10:36 |
2043 | 666353288456101888 | NaN | NaN | 2015-11-16 20:32:58 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a mixed Asiago from the GalƔpagos... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666353288... | ... | 0.147655 | True | Eskimo_dog | 0.093412 | True | 221 | 73 | 6997190 | 134700 | 2015-11-16 20:32:58 |
2044 | 666345417576210432 | NaN | NaN | 2015-11-16 20:01:42 +0000 | <a href="http://twitter.com/download/iphone" r... | Look at this jokester thinking seat belt laws ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666345417... | ... | 0.054787 | True | Labrador_retriever | 0.014241 | True | 299 | 139 | 6997190 | 134700 | 2015-11-16 20:01:42 |
2045 | 666337882303524864 | NaN | NaN | 2015-11-16 19:31:45 +0000 | <a href="http://twitter.com/download/iphone" r... | This is an extremely rare horned Parthenon. No... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666337882... | ... | 0.278407 | True | groenendael | 0.102643 | True | 198 | 92 | 6997190 | 134700 | 2015-11-16 19:31:45 |
2046 | 666293911632134144 | NaN | NaN | 2015-11-16 16:37:02 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a funny dog. Weird toes. Won't come do... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666293911... | ... | 0.015250 | False | great_grey_owl | 0.013207 | False | 509 | 357 | 6997190 | 134700 | 2015-11-16 16:37:02 |
2047 | 666287406224695296 | NaN | NaN | 2015-11-16 16:11:11 +0000 | <a href="http://twitter.com/download/iphone" r... | This is an Albanian 3 1/2 legged Episcopalian... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666287406... | ... | 0.063064 | True | miniature_poodle | 0.025581 | True | 149 | 66 | 6997190 | 134700 | 2015-11-16 16:11:11 |
2048 | 666273097616637952 | NaN | NaN | 2015-11-16 15:14:19 +0000 | <a href="http://twitter.com/download/iphone" r... | Can take selfies 11/10 https://t.co/ws2AMaNwPW | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666273097... | ... | 0.111884 | True | basenji | 0.111152 | True | 176 | 76 | 6997190 | 134700 | 2015-11-16 15:14:19 |
2049 | 666268910803644416 | NaN | NaN | 2015-11-16 14:57:41 +0000 | <a href="http://twitter.com/download/iphone" r... | Very concerned about fellow dog trapped in com... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666268910... | ... | 0.085547 | False | bookcase | 0.079480 | False | 104 | 35 | 6997190 | 134700 | 2015-11-16 14:57:41 |
2050 | 666104133288665088 | NaN | NaN | 2015-11-16 04:02:55 +0000 | <a href="http://twitter.com/download/iphone" r... | Not familiar with this breed. No tail (weird).... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666104133... | ... | 0.033919 | False | partridge | 0.000052 | False | 14346 | 6634 | 6997190 | 134700 | 2015-11-16 04:02:55 |
2051 | 666102155909144576 | NaN | NaN | 2015-11-16 03:55:04 +0000 | <a href="http://twitter.com/download/iphone" r... | Oh my. Here you are seeing an Adobe Setter giv... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666102155... | ... | 0.149842 | True | borzoi | 0.133649 | True | 80 | 13 | 6997190 | 134700 | 2015-11-16 03:55:04 |
2052 | 666099513787052032 | NaN | NaN | 2015-11-16 03:44:34 +0000 | <a href="http://twitter.com/download/iphone" r... | Can stand on stump for what seems like a while... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666099513... | ... | 0.166192 | True | Dandie_Dinmont | 0.089688 | True | 156 | 68 | 6997190 | 134700 | 2015-11-16 03:44:34 |
2053 | 666094000022159362 | NaN | NaN | 2015-11-16 03:22:39 +0000 | <a href="http://twitter.com/download/iphone" r... | This appears to be a Mongolian Presbyterian mi... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666094000... | ... | 0.078260 | True | malinois | 0.075628 | True | 164 | 74 | 6997190 | 134700 | 2015-11-16 03:22:39 |
2054 | 666082916733198337 | NaN | NaN | 2015-11-16 02:38:37 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a well-established sunblockerspan... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666082916... | ... | 0.404722 | True | French_bulldog | 0.048960 | True | 119 | 45 | 6997190 | 134700 | 2015-11-16 02:38:37 |
2055 | 666073100786774016 | NaN | NaN | 2015-11-16 01:59:36 +0000 | <a href="http://twitter.com/download/iphone" r... | Let's hope this flight isn't Malaysian (lol). ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666073100... | ... | 0.175382 | True | Ibizan_hound | 0.097471 | True | 322 | 164 | 6997190 | 134700 | 2015-11-16 01:59:36 |
2056 | 666071193221509120 | NaN | NaN | 2015-11-16 01:52:02 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a northern speckled Rhododendron.... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666071193... | ... | 0.174201 | True | Pekinese | 0.109454 | True | 148 | 62 | 6997190 | 134700 | 2015-11-16 01:52:02 |
2057 | 666063827256086533 | NaN | NaN | 2015-11-16 01:22:45 +0000 | <a href="http://twitter.com/download/iphone" r... | This is the happiest dog you will ever see. Ve... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666063827... | ... | 0.093718 | True | Labrador_retriever | 0.072427 | True | 476 | 219 | 6997190 | 134700 | 2015-11-16 01:22:45 |
2058 | 666058600524156928 | NaN | NaN | 2015-11-16 01:01:59 +0000 | <a href="http://twitter.com/download/iphone" r... | Here is the Rand Paul of retrievers folks! He'... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666058600... | ... | 0.192305 | True | soft-coated_wheaten_terrier | 0.082086 | True | 112 | 57 | 6997190 | 134700 | 2015-11-16 01:01:59 |
2059 | 666057090499244032 | NaN | NaN | 2015-11-16 00:55:59 +0000 | <a href="http://twitter.com/download/iphone" r... | My oh my. This is a rare blond Canadian terrie... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666057090... | ... | 0.014594 | False | golden_retriever | 0.007959 | True | 298 | 142 | 6997190 | 134700 | 2015-11-16 00:55:59 |
2060 | 666055525042405380 | NaN | NaN | 2015-11-16 00:49:46 +0000 | <a href="http://twitter.com/download/iphone" r... | Here is a Siberian heavily armored polar bear ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666055525... | ... | 0.058279 | True | fur_coat | 0.054449 | False | 434 | 252 | 6997190 | 134700 | 2015-11-16 00:49:46 |
2061 | 666051853826850816 | NaN | NaN | 2015-11-16 00:35:11 +0000 | <a href="http://twitter.com/download/iphone" r... | This is an odd dog. Hard on the outside but lo... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666051853... | ... | 0.045885 | False | terrapin | 0.017885 | False | 1223 | 853 | 6997190 | 134700 | 2015-11-16 00:35:11 |
2062 | 666050758794694657 | NaN | NaN | 2015-11-16 00:30:50 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a truly beautiful English Wilson Staff... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666050758... | ... | 0.263788 | True | Greater_Swiss_Mountain_dog | 0.016199 | True | 132 | 58 | 6997190 | 134700 | 2015-11-16 00:30:50 |
2063 | 666049248165822465 | NaN | NaN | 2015-11-16 00:24:50 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a 1949 1st generation vulpix. Enj... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666049248... | ... | 0.243682 | True | Doberman | 0.154629 | True | 109 | 41 | 6997190 | 134700 | 2015-11-16 00:24:50 |
2064 | 666044226329800704 | NaN | NaN | 2015-11-16 00:04:52 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a purebred Piers Morgan. Loves to Netf... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666044226... | ... | 0.360687 | True | miniature_pinscher | 0.222752 | True | 299 | 141 | 6997190 | 134700 | 2015-11-16 00:04:52 |
2065 | 666033412701032449 | NaN | NaN | 2015-11-15 23:21:54 +0000 | <a href="http://twitter.com/download/iphone" r... | Here is a very happy pup. Big fan of well-main... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666033412... | ... | 0.138584 | True | bloodhound | 0.116197 | True | 125 | 44 | 6997190 | 134700 | 2015-11-15 23:21:54 |
2066 | 666029285002620928 | NaN | NaN | 2015-11-15 23:05:30 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a western brown Mitsubishi terrier. Up... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666029285... | ... | 0.074192 | True | Rhodesian_ridgeback | 0.072010 | True | 129 | 47 | 6997190 | 134700 | 2015-11-15 23:05:30 |
2067 | 666020888022790149 | NaN | NaN | 2015-11-15 22:32:08 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a Japanese Irish Setter. Lost eye... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666020888... | ... | 0.156665 | True | Shetland_sheepdog | 0.061428 | True | 2560 | 517 | 6997190 | 134700 | 2015-11-15 22:32:08 |
1993 rows × 33 columns
Define¶
quality issue #3 Delete useless columns as these contains most of the values as NaN
#code
df_master = df_master.drop('in_reply_to_status_id', 1)
df_master = df_master.drop('in_reply_to_user_id', 1)
df_master = df_master.drop('retweeted_status_id', 1)
df_master = df_master.drop('retweeted_status_timestamp', 1)
#Test
df_master.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1993 entries, 0 to 2067 Data columns (total 29 columns): tweet_id 1993 non-null int64 timestamp 1993 non-null object source 1993 non-null object text 1993 non-null object retweeted_status_user_id 0 non-null float64 expanded_urls 1993 non-null object rating_numerator 1993 non-null int64 rating_denominator 1993 non-null int64 name 1993 non-null object doggo 1993 non-null object floofer 1993 non-null object pupper 1993 non-null object puppo 1993 non-null object jpg_url 1993 non-null object img_num 1993 non-null int64 p1 1993 non-null object p1_conf 1993 non-null float64 p1_dog 1993 non-null bool p2 1993 non-null object p2_conf 1993 non-null float64 p2_dog 1993 non-null bool p3 1993 non-null object p3_conf 1993 non-null float64 p3_dog 1993 non-null bool favorites 1993 non-null int64 retweets 1993 non-null int64 user_followers 1993 non-null int64 user_favourites 1993 non-null int64 date_time 1993 non-null object dtypes: bool(3), float64(4), int64(8), object(14) memory usage: 426.2+ KB
Define¶
Data quality issue#4 Delete date_time as it's duplicate , same value coming from 2 data sources.
#code
df_master = df_master.drop('date_time', 1)
#test
df_master.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1993 entries, 0 to 2067 Data columns (total 28 columns): tweet_id 1993 non-null int64 timestamp 1993 non-null object source 1993 non-null object text 1993 non-null object retweeted_status_user_id 0 non-null float64 expanded_urls 1993 non-null object rating_numerator 1993 non-null int64 rating_denominator 1993 non-null int64 name 1993 non-null object doggo 1993 non-null object floofer 1993 non-null object pupper 1993 non-null object puppo 1993 non-null object jpg_url 1993 non-null object img_num 1993 non-null int64 p1 1993 non-null object p1_conf 1993 non-null float64 p1_dog 1993 non-null bool p2 1993 non-null object p2_conf 1993 non-null float64 p2_dog 1993 non-null bool p3 1993 non-null object p3_conf 1993 non-null float64 p3_dog 1993 non-null bool favorites 1993 non-null int64 retweets 1993 non-null int64 user_followers 1993 non-null int64 user_favourites 1993 non-null int64 dtypes: bool(3), float64(4), int64(8), object(13) memory usage: 410.7+ KB
#code
mask = df_master.rating_denominator < 10
column_name = 'rating_denominator'
df_master.loc[mask, column_name] = 10
#Test
#a value of zero shows , cleaning step is successful.
df_master.loc[df_master['rating_denominator'] < 10]
tweet_id | timestamp | source | text | retweeted_status_user_id | expanded_urls | rating_numerator | rating_denominator | name | doggo | ... | p2 | p2_conf | p2_dog | p3 | p3_conf | p3_dog | favorites | retweets | user_followers | user_favourites |
---|
0 rows × 28 columns
#Code
mask = (df_master.name.str.islower() == True)
column_name = 'name'
df_master.loc[mask, column_name] = 'None'
#test
#eye ball check to confirm that invalid values such as 'a' ,'an' , 'the' are replaced with 'None'
df_master
tweet_id | timestamp | source | text | retweeted_status_user_id | expanded_urls | rating_numerator | rating_denominator | name | doggo | ... | p2 | p2_conf | p2_dog | p3 | p3_conf | p3_dog | favorites | retweets | user_followers | user_favourites | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 892420643555336193 | 2017-08-01 16:23:56 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Phineas. He's a mystical boy. Only eve... | NaN | https://twitter.com/dog_rates/status/892420643... | 13 | 10 | Phineas | None | ... | bagel | 0.085851 | False | banana | 0.076110 | False | 38680 | 8559 | 6997101 | 134700 |
1 | 892177421306343426 | 2017-08-01 00:17:27 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Tilly. She's just checking pup on you.... | NaN | https://twitter.com/dog_rates/status/892177421... | 13 | 10 | Tilly | None | ... | Pekinese | 0.090647 | True | papillon | 0.068957 | True | 33156 | 6291 | 6997101 | 134700 |
2 | 891815181378084864 | 2017-07-31 00:18:03 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Archie. He is a rare Norwegian Pouncin... | NaN | https://twitter.com/dog_rates/status/891815181... | 12 | 10 | Archie | None | ... | malamute | 0.078253 | True | kelpie | 0.031379 | True | 24958 | 4170 | 6997101 | 134700 |
3 | 891689557279858688 | 2017-07-30 15:58:51 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Darla. She commenced a snooze mid meal... | NaN | https://twitter.com/dog_rates/status/891689557... | 13 | 10 | Darla | None | ... | Labrador_retriever | 0.168086 | True | spatula | 0.040836 | False | 42064 | 8687 | 6997101 | 134700 |
4 | 891327558926688256 | 2017-07-29 16:00:24 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Franklin. He would like you to stop ca... | NaN | https://twitter.com/dog_rates/status/891327558... | 12 | 10 | Franklin | None | ... | English_springer | 0.225770 | True | German_short-haired_pointer | 0.175219 | True | 40212 | 9447 | 6997101 | 134700 |
5 | 891087950875897856 | 2017-07-29 00:08:17 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a majestic great white breaching ... | NaN | https://twitter.com/dog_rates/status/891087950... | 13 | 10 | None | None | ... | Irish_terrier | 0.116317 | True | Indian_elephant | 0.076902 | False | 20163 | 3127 | 6997101 | 134700 |
6 | 890971913173991426 | 2017-07-28 16:27:12 +0000 | <a href="http://twitter.com/download/iphone" r... | Meet Jax. He enjoys ice cream so much he gets ... | NaN | https://gofundme.com/ydvmve-surgery-for-jax,ht... | 13 | 10 | Jax | None | ... | Border_collie | 0.199287 | True | ice_lolly | 0.193548 | False | 11816 | 2082 | 6997101 | 134700 |
7 | 890729181411237888 | 2017-07-28 00:22:40 +0000 | <a href="http://twitter.com/download/iphone" r... | When you watch your owner call another dog a g... | NaN | https://twitter.com/dog_rates/status/890729181... | 13 | 10 | None | None | ... | Eskimo_dog | 0.178406 | True | Pembroke | 0.076507 | True | 65338 | 18971 | 6997101 | 134700 |
8 | 890609185150312448 | 2017-07-27 16:25:51 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Zoey. She doesn't want to be one of th... | NaN | https://twitter.com/dog_rates/status/890609185... | 13 | 10 | Zoey | None | ... | Irish_setter | 0.193054 | True | Chesapeake_Bay_retriever | 0.118184 | True | 27713 | 4279 | 6997101 | 134700 |
9 | 890240255349198849 | 2017-07-26 15:59:51 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Cassie. She is a college pup. Studying... | NaN | https://twitter.com/dog_rates/status/890240255... | 14 | 10 | Cassie | doggo | ... | Cardigan | 0.451038 | True | Chihuahua | 0.029248 | True | 31857 | 7448 | 6997101 | 134700 |
10 | 890006608113172480 | 2017-07-26 00:31:25 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Koda. He is a South Australian decksha... | NaN | https://twitter.com/dog_rates/status/890006608... | 13 | 10 | Koda | None | ... | Pomeranian | 0.013884 | True | chow | 0.008167 | True | 30583 | 7363 | 6997101 | 134700 |
11 | 889880896479866881 | 2017-07-25 16:11:53 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Bruno. He is a service shark. Only get... | NaN | https://twitter.com/dog_rates/status/889880896... | 13 | 10 | Bruno | None | ... | Labrador_retriever | 0.151317 | True | muzzle | 0.082981 | False | 27719 | 4989 | 6997101 | 134700 |
12 | 889665388333682689 | 2017-07-25 01:55:32 +0000 | <a href="http://twitter.com/download/iphone" r... | Here's a puppo that seems to be on the fence a... | NaN | https://twitter.com/dog_rates/status/889665388... | 13 | 10 | None | None | ... | Cardigan | 0.027356 | True | basenji | 0.004633 | True | 48003 | 10101 | 6997101 | 134700 |
13 | 889638837579907072 | 2017-07-25 00:10:02 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Ted. He does his best. Sometimes that'... | NaN | https://twitter.com/dog_rates/status/889638837... | 12 | 10 | Ted | None | ... | boxer | 0.002129 | True | Staffordshire_bullterrier | 0.001498 | True | 27103 | 4564 | 6997101 | 134700 |
14 | 889531135344209921 | 2017-07-24 17:02:04 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Stuart. He's sporting his favorite fan... | NaN | https://twitter.com/dog_rates/status/889531135... | 13 | 10 | Stuart | None | ... | Labrador_retriever | 0.013834 | True | redbone | 0.007958 | True | 15052 | 2244 | 6997101 | 134700 |
15 | 889278841981685760 | 2017-07-24 00:19:32 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Oliver. You're witnessing one of his m... | NaN | https://twitter.com/dog_rates/status/889278841... | 13 | 10 | Oliver | None | ... | borzoi | 0.194742 | True | Saluki | 0.027351 | True | 25236 | 5446 | 6997101 | 134700 |
16 | 888917238123831296 | 2017-07-23 00:22:39 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Jim. He found a fren. Taught him how t... | NaN | https://twitter.com/dog_rates/status/888917238... | 12 | 10 | Jim | None | ... | Tibetan_mastiff | 0.120184 | True | Labrador_retriever | 0.105506 | True | 28996 | 4517 | 6997101 | 134700 |
17 | 888804989199671297 | 2017-07-22 16:56:37 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Zeke. He has a new stick. Very proud o... | NaN | https://twitter.com/dog_rates/status/888804989... | 13 | 10 | Zeke | None | ... | Labrador_retriever | 0.184172 | True | English_setter | 0.073482 | True | 25527 | 4363 | 6997101 | 134700 |
18 | 888554962724278272 | 2017-07-22 00:23:06 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Ralphus. He's powering up. Attempting ... | NaN | https://twitter.com/dog_rates/status/888554962... | 13 | 10 | Ralphus | None | ... | Eskimo_dog | 0.166511 | True | malamute | 0.111411 | True | 19850 | 3596 | 6997101 | 134700 |
19 | 888078434458587136 | 2017-07-20 16:49:33 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Gerald. He was just told he didn't get... | NaN | https://twitter.com/dog_rates/status/888078434... | 12 | 10 | Gerald | None | ... | pug | 0.000932 | True | bull_mastiff | 0.000903 | True | 21702 | 3511 | 6997101 | 134700 |
20 | 887705289381826560 | 2017-07-19 16:06:48 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Jeffrey. He has a monopoly on the pool... | NaN | https://twitter.com/dog_rates/status/887705289... | 13 | 10 | Jeffrey | None | ... | redbone | 0.087582 | True | Weimaraner | 0.026236 | True | 30108 | 5414 | 6997101 | 134700 |
21 | 887517139158093824 | 2017-07-19 03:39:09 +0000 | <a href="http://twitter.com/download/iphone" r... | I've yet to rate a Venezuelan Hover Wiener. Th... | NaN | https://twitter.com/dog_rates/status/887517139... | 14 | 10 | None | None | ... | tow_truck | 0.029175 | False | shopping_cart | 0.026321 | False | 46119 | 11713 | 6997101 | 134700 |
22 | 887473957103951883 | 2017-07-19 00:47:34 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Canela. She attempted some fancy porch... | NaN | https://twitter.com/dog_rates/status/887473957... | 13 | 10 | Canela | None | ... | Rhodesian_ridgeback | 0.054950 | True | beagle | 0.038915 | True | 68935 | 18305 | 6997101 | 134700 |
23 | 887343217045368832 | 2017-07-18 16:08:03 +0000 | <a href="http://twitter.com/download/iphone" r... | You may not have known you needed to see this ... | NaN | https://twitter.com/dog_rates/status/887343217... | 13 | 10 | None | None | ... | sea_lion | 0.275645 | False | Weimaraner | 0.134203 | True | 33582 | 10446 | 6997101 | 134700 |
24 | 887101392804085760 | 2017-07-18 00:07:08 +0000 | <a href="http://twitter.com/download/iphone" r... | This... is a Jubilant Antarctic House Bear. We... | NaN | https://twitter.com/dog_rates/status/887101392... | 12 | 10 | None | None | ... | Eskimo_dog | 0.035029 | True | Staffordshire_bullterrier | 0.029705 | True | 30451 | 5984 | 6997101 | 134700 |
25 | 886983233522544640 | 2017-07-17 16:17:36 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Maya. She's very shy. Rarely leaves he... | NaN | https://twitter.com/dog_rates/status/886983233... | 13 | 10 | Maya | None | ... | toy_terrier | 0.143528 | True | can_opener | 0.032253 | False | 35058 | 7806 | 6997101 | 134700 |
26 | 886736880519319552 | 2017-07-16 23:58:41 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Mingus. He's a wonderful father to his... | NaN | https://www.gofundme.com/mingusneedsus,https:/... | 13 | 10 | Mingus | None | ... | Great_Pyrenees | 0.186136 | True | Dandie_Dinmont | 0.086346 | True | 12037 | 3311 | 6997101 | 134700 |
27 | 886680336477933568 | 2017-07-16 20:14:00 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Derek. He's late for a dog meeting. 13... | NaN | https://twitter.com/dog_rates/status/886680336... | 13 | 10 | Derek | None | ... | sports_car | 0.139952 | False | car_wheel | 0.044173 | False | 22362 | 4486 | 6997101 | 134700 |
28 | 886366144734445568 | 2017-07-15 23:25:31 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Roscoe. Another pupper fallen victim t... | NaN | https://twitter.com/dog_rates/status/886366144... | 12 | 10 | Roscoe | None | ... | Chihuahua | 0.000361 | True | Boston_bull | 0.000076 | True | 21142 | 3208 | 6997102 | 134700 |
29 | 886258384151887873 | 2017-07-15 16:17:19 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Waffles. His doggles are pupside down.... | NaN | https://twitter.com/dog_rates/status/886258384... | 13 | 10 | Waffles | None | ... | shower_cap | 0.025286 | False | Siamese_cat | 0.002849 | False | 27905 | 6316 | 6997102 | 134700 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2038 | 666411507551481857 | 2015-11-17 00:24:19 +0000 | <a href="http://twitter.com/download/iphone" r... | This is quite the dog. Gets really excited whe... | NaN | https://twitter.com/dog_rates/status/666411507... | 2 | 10 | None | None | ... | barracouta | 0.271485 | False | gar | 0.189945 | False | 447 | 328 | 6997189 | 134700 |
2039 | 666407126856765440 | 2015-11-17 00:06:54 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a southern Vesuvius bumblegruff. Can d... | NaN | https://twitter.com/dog_rates/status/666407126... | 7 | 10 | None | None | ... | bloodhound | 0.244220 | True | flat-coated_retriever | 0.173810 | True | 110 | 41 | 6997189 | 134700 |
2040 | 666396247373291520 | 2015-11-16 23:23:41 +0000 | <a href="http://twitter.com/download/iphone" r... | Oh goodness. A super rare northeast Qdoba kang... | NaN | https://twitter.com/dog_rates/status/666396247... | 9 | 10 | None | None | ... | toy_terrier | 0.009397 | True | papillon | 0.004577 | True | 167 | 86 | 6997189 | 134700 |
2041 | 666373753744588802 | 2015-11-16 21:54:18 +0000 | <a href="http://twitter.com/download/iphone" r... | Those are sunglasses and a jean jacket. 11/10 ... | NaN | https://twitter.com/dog_rates/status/666373753... | 11 | 10 | None | None | ... | Afghan_hound | 0.259551 | True | briard | 0.206803 | True | 190 | 93 | 6997189 | 134700 |
2042 | 666362758909284353 | 2015-11-16 21:10:36 +0000 | <a href="http://twitter.com/download/iphone" r... | Unique dog here. Very small. Lives in containe... | NaN | https://twitter.com/dog_rates/status/666362758... | 6 | 10 | None | None | ... | skunk | 0.002402 | False | hamster | 0.000461 | False | 778 | 574 | 6997189 | 134700 |
2043 | 666353288456101888 | 2015-11-16 20:32:58 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a mixed Asiago from the GalƔpagos... | NaN | https://twitter.com/dog_rates/status/666353288... | 8 | 10 | None | None | ... | Siberian_husky | 0.147655 | True | Eskimo_dog | 0.093412 | True | 221 | 73 | 6997190 | 134700 |
2044 | 666345417576210432 | 2015-11-16 20:01:42 +0000 | <a href="http://twitter.com/download/iphone" r... | Look at this jokester thinking seat belt laws ... | NaN | https://twitter.com/dog_rates/status/666345417... | 10 | 10 | None | None | ... | Chesapeake_Bay_retriever | 0.054787 | True | Labrador_retriever | 0.014241 | True | 299 | 139 | 6997190 | 134700 |
2045 | 666337882303524864 | 2015-11-16 19:31:45 +0000 | <a href="http://twitter.com/download/iphone" r... | This is an extremely rare horned Parthenon. No... | NaN | https://twitter.com/dog_rates/status/666337882... | 9 | 10 | None | None | ... | Newfoundland | 0.278407 | True | groenendael | 0.102643 | True | 198 | 92 | 6997190 | 134700 |
2046 | 666293911632134144 | 2015-11-16 16:37:02 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a funny dog. Weird toes. Won't come do... | NaN | https://twitter.com/dog_rates/status/666293911... | 3 | 10 | None | None | ... | otter | 0.015250 | False | great_grey_owl | 0.013207 | False | 509 | 357 | 6997190 | 134700 |
2047 | 666287406224695296 | 2015-11-16 16:11:11 +0000 | <a href="http://twitter.com/download/iphone" r... | This is an Albanian 3 1/2 legged Episcopalian... | NaN | https://twitter.com/dog_rates/status/666287406... | 1 | 10 | None | None | ... | toy_poodle | 0.063064 | True | miniature_poodle | 0.025581 | True | 149 | 66 | 6997190 | 134700 |
2048 | 666273097616637952 | 2015-11-16 15:14:19 +0000 | <a href="http://twitter.com/download/iphone" r... | Can take selfies 11/10 https://t.co/ws2AMaNwPW | NaN | https://twitter.com/dog_rates/status/666273097... | 11 | 10 | None | None | ... | toy_terrier | 0.111884 | True | basenji | 0.111152 | True | 176 | 76 | 6997190 | 134700 |
2049 | 666268910803644416 | 2015-11-16 14:57:41 +0000 | <a href="http://twitter.com/download/iphone" r... | Very concerned about fellow dog trapped in com... | NaN | https://twitter.com/dog_rates/status/666268910... | 10 | 10 | None | None | ... | desk | 0.085547 | False | bookcase | 0.079480 | False | 104 | 35 | 6997190 | 134700 |
2050 | 666104133288665088 | 2015-11-16 04:02:55 +0000 | <a href="http://twitter.com/download/iphone" r... | Not familiar with this breed. No tail (weird).... | NaN | https://twitter.com/dog_rates/status/666104133... | 1 | 10 | None | None | ... | cock | 0.033919 | False | partridge | 0.000052 | False | 14346 | 6634 | 6997190 | 134700 |
2051 | 666102155909144576 | 2015-11-16 03:55:04 +0000 | <a href="http://twitter.com/download/iphone" r... | Oh my. Here you are seeing an Adobe Setter giv... | NaN | https://twitter.com/dog_rates/status/666102155... | 11 | 10 | None | None | ... | Newfoundland | 0.149842 | True | borzoi | 0.133649 | True | 80 | 13 | 6997190 | 134700 |
2052 | 666099513787052032 | 2015-11-16 03:44:34 +0000 | <a href="http://twitter.com/download/iphone" r... | Can stand on stump for what seems like a while... | NaN | https://twitter.com/dog_rates/status/666099513... | 8 | 10 | None | None | ... | Shih-Tzu | 0.166192 | True | Dandie_Dinmont | 0.089688 | True | 156 | 68 | 6997190 | 134700 |
2053 | 666094000022159362 | 2015-11-16 03:22:39 +0000 | <a href="http://twitter.com/download/iphone" r... | This appears to be a Mongolian Presbyterian mi... | NaN | https://twitter.com/dog_rates/status/666094000... | 9 | 10 | None | None | ... | German_shepherd | 0.078260 | True | malinois | 0.075628 | True | 164 | 74 | 6997190 | 134700 |
2054 | 666082916733198337 | 2015-11-16 02:38:37 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a well-established sunblockerspan... | NaN | https://twitter.com/dog_rates/status/666082916... | 6 | 10 | None | None | ... | bull_mastiff | 0.404722 | True | French_bulldog | 0.048960 | True | 119 | 45 | 6997190 | 134700 |
2055 | 666073100786774016 | 2015-11-16 01:59:36 +0000 | <a href="http://twitter.com/download/iphone" r... | Let's hope this flight isn't Malaysian (lol). ... | NaN | https://twitter.com/dog_rates/status/666073100... | 10 | 10 | None | None | ... | English_foxhound | 0.175382 | True | Ibizan_hound | 0.097471 | True | 322 | 164 | 6997190 | 134700 |
2056 | 666071193221509120 | 2015-11-16 01:52:02 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a northern speckled Rhododendron.... | NaN | https://twitter.com/dog_rates/status/666071193... | 9 | 10 | None | None | ... | Yorkshire_terrier | 0.174201 | True | Pekinese | 0.109454 | True | 148 | 62 | 6997190 | 134700 |
2057 | 666063827256086533 | 2015-11-16 01:22:45 +0000 | <a href="http://twitter.com/download/iphone" r... | This is the happiest dog you will ever see. Ve... | NaN | https://twitter.com/dog_rates/status/666063827... | 10 | 10 | None | None | ... | Tibetan_mastiff | 0.093718 | True | Labrador_retriever | 0.072427 | True | 476 | 219 | 6997190 | 134700 |
2058 | 666058600524156928 | 2015-11-16 01:01:59 +0000 | <a href="http://twitter.com/download/iphone" r... | Here is the Rand Paul of retrievers folks! He'... | NaN | https://twitter.com/dog_rates/status/666058600... | 8 | 10 | None | None | ... | komondor | 0.192305 | True | soft-coated_wheaten_terrier | 0.082086 | True | 112 | 57 | 6997190 | 134700 |
2059 | 666057090499244032 | 2015-11-16 00:55:59 +0000 | <a href="http://twitter.com/download/iphone" r... | My oh my. This is a rare blond Canadian terrie... | NaN | https://twitter.com/dog_rates/status/666057090... | 9 | 10 | None | None | ... | shopping_basket | 0.014594 | False | golden_retriever | 0.007959 | True | 298 | 142 | 6997190 | 134700 |
2060 | 666055525042405380 | 2015-11-16 00:49:46 +0000 | <a href="http://twitter.com/download/iphone" r... | Here is a Siberian heavily armored polar bear ... | NaN | https://twitter.com/dog_rates/status/666055525... | 10 | 10 | None | None | ... | Tibetan_mastiff | 0.058279 | True | fur_coat | 0.054449 | False | 434 | 252 | 6997190 | 134700 |
2061 | 666051853826850816 | 2015-11-16 00:35:11 +0000 | <a href="http://twitter.com/download/iphone" r... | This is an odd dog. Hard on the outside but lo... | NaN | https://twitter.com/dog_rates/status/666051853... | 2 | 10 | None | None | ... | mud_turtle | 0.045885 | False | terrapin | 0.017885 | False | 1223 | 853 | 6997190 | 134700 |
2062 | 666050758794694657 | 2015-11-16 00:30:50 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a truly beautiful English Wilson Staff... | NaN | https://twitter.com/dog_rates/status/666050758... | 10 | 10 | None | None | ... | English_springer | 0.263788 | True | Greater_Swiss_Mountain_dog | 0.016199 | True | 132 | 58 | 6997190 | 134700 |
2063 | 666049248165822465 | 2015-11-16 00:24:50 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a 1949 1st generation vulpix. Enj... | NaN | https://twitter.com/dog_rates/status/666049248... | 5 | 10 | None | None | ... | Rottweiler | 0.243682 | True | Doberman | 0.154629 | True | 109 | 41 | 6997190 | 134700 |
2064 | 666044226329800704 | 2015-11-16 00:04:52 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a purebred Piers Morgan. Loves to Netf... | NaN | https://twitter.com/dog_rates/status/666044226... | 6 | 10 | None | None | ... | redbone | 0.360687 | True | miniature_pinscher | 0.222752 | True | 299 | 141 | 6997190 | 134700 |
2065 | 666033412701032449 | 2015-11-15 23:21:54 +0000 | <a href="http://twitter.com/download/iphone" r... | Here is a very happy pup. Big fan of well-main... | NaN | https://twitter.com/dog_rates/status/666033412... | 9 | 10 | None | None | ... | malinois | 0.138584 | True | bloodhound | 0.116197 | True | 125 | 44 | 6997190 | 134700 |
2066 | 666029285002620928 | 2015-11-15 23:05:30 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a western brown Mitsubishi terrier. Up... | NaN | https://twitter.com/dog_rates/status/666029285... | 7 | 10 | None | None | ... | miniature_pinscher | 0.074192 | True | Rhodesian_ridgeback | 0.072010 | True | 129 | 47 | 6997190 | 134700 |
2067 | 666020888022790149 | 2015-11-15 22:32:08 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a Japanese Irish Setter. Lost eye... | NaN | https://twitter.com/dog_rates/status/666020888... | 8 | 10 | None | None | ... | collie | 0.156665 | True | Shetland_sheepdog | 0.061428 | True | 2560 | 517 | 6997190 | 134700 |
1993 rows × 28 columns
#code
df_master['retweeted_status_user_id'] = df_master['retweeted_status_user_id'].astype(str)
#Test
df_master.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1993 entries, 0 to 2067 Data columns (total 28 columns): tweet_id 1993 non-null int64 timestamp 1993 non-null object source 1993 non-null object text 1993 non-null object retweeted_status_user_id 1993 non-null object expanded_urls 1993 non-null object rating_numerator 1993 non-null int64 rating_denominator 1993 non-null int64 name 1993 non-null object doggo 1993 non-null object floofer 1993 non-null object pupper 1993 non-null object puppo 1993 non-null object jpg_url 1993 non-null object img_num 1993 non-null int64 p1 1993 non-null object p1_conf 1993 non-null float64 p1_dog 1993 non-null bool p2 1993 non-null object p2_conf 1993 non-null float64 p2_dog 1993 non-null bool p3 1993 non-null object p3_conf 1993 non-null float64 p3_dog 1993 non-null bool favorites 1993 non-null int64 retweets 1993 non-null int64 user_followers 1993 non-null int64 user_favourites 1993 non-null int64 dtypes: bool(3), float64(3), int64(8), object(14) memory usage: 410.7+ KB
#Code
df_master = df_master.rename(columns = {'p1': 'category1', 'p2': 'category2', 'p3': 'category3',
'p1_conf': 'cat1_conf', 'p2_conf': 'cat2_conf',
'p3_conf': 'cat3_conf', 'p1_dog': 'cat1_dog',
'p2_dog': 'cat2_dog', 'p3_dog': 'cat3_dog'})
#Test
df_master.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1993 entries, 0 to 2067 Data columns (total 28 columns): tweet_id 1993 non-null int64 timestamp 1993 non-null object source 1993 non-null object text 1993 non-null object retweeted_status_user_id 1993 non-null object expanded_urls 1993 non-null object rating_numerator 1993 non-null int64 rating_denominator 1993 non-null int64 name 1993 non-null object doggo 1993 non-null object floofer 1993 non-null object pupper 1993 non-null object puppo 1993 non-null object jpg_url 1993 non-null object img_num 1993 non-null int64 category1 1993 non-null object cat1_conf 1993 non-null float64 cat1_dog 1993 non-null bool category2 1993 non-null object cat2_conf 1993 non-null float64 cat2_dog 1993 non-null bool category3 1993 non-null object cat3_conf 1993 non-null float64 cat3_dog 1993 non-null bool favorites 1993 non-null int64 retweets 1993 non-null int64 user_followers 1993 non-null int64 user_favourites 1993 non-null int64 dtypes: bool(3), float64(3), int64(8), object(14) memory usage: 410.7+ KB
#code
df_master = df_master.drop('rating_denominator', 1)
df_master = df_master.rename(columns = {'rating_numerator': 'rating_out_of_10'})
#test
df_master.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1993 entries, 0 to 2067 Data columns (total 27 columns): tweet_id 1993 non-null int64 timestamp 1993 non-null object source 1993 non-null object text 1993 non-null object retweeted_status_user_id 1993 non-null object expanded_urls 1993 non-null object rating_out_of_10 1993 non-null int64 name 1993 non-null object doggo 1993 non-null object floofer 1993 non-null object pupper 1993 non-null object puppo 1993 non-null object jpg_url 1993 non-null object img_num 1993 non-null int64 category1 1993 non-null object cat1_conf 1993 non-null float64 cat1_dog 1993 non-null bool category2 1993 non-null object cat2_conf 1993 non-null float64 cat2_dog 1993 non-null bool category3 1993 non-null object cat3_conf 1993 non-null float64 cat3_dog 1993 non-null bool favorites 1993 non-null int64 retweets 1993 non-null int64 user_followers 1993 non-null int64 user_favourites 1993 non-null int64 dtypes: bool(3), float64(3), int64(7), object(14) memory usage: 395.1+ KB
#code
mask = (df_master.doggo == 'None')
column_name = 'doggo'
df_master.loc[mask, column_name] = ' '
mask = (df_master.floofer == 'None')
column_name = 'floofer'
df_master.loc[mask, column_name] = ' '
mask = (df_master.pupper == 'None')
column_name = 'pupper'
df_master.loc[mask, column_name] = ' '
mask = (df_master.puppo == 'None')
column_name = 'puppo'
df_master.loc[mask, column_name] = ' '
#merge these columns and create a column named dog_stage
df_master['dog_stage'] = df_master['doggo'] + df_master['floofer'] + df_master['pupper'] + df_master['puppo']
#delete redundant columns doggo, floofer, pupper, and puppo
df_master = df_master.drop('doggo', 1)
df_master = df_master.drop('floofer', 1)
df_master = df_master.drop('pupper', 1)
df_master = df_master.drop('puppo', 1)
#test
#check dataframe info for above changes...
df_master.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1993 entries, 0 to 2067 Data columns (total 24 columns): tweet_id 1993 non-null int64 timestamp 1993 non-null object source 1993 non-null object text 1993 non-null object retweeted_status_user_id 1993 non-null object expanded_urls 1993 non-null object rating_out_of_10 1993 non-null int64 name 1993 non-null object jpg_url 1993 non-null object img_num 1993 non-null int64 category1 1993 non-null object cat1_conf 1993 non-null float64 cat1_dog 1993 non-null bool category2 1993 non-null object cat2_conf 1993 non-null float64 cat2_dog 1993 non-null bool category3 1993 non-null object cat3_conf 1993 non-null float64 cat3_dog 1993 non-null bool favorites 1993 non-null int64 retweets 1993 non-null int64 user_followers 1993 non-null int64 user_favourites 1993 non-null int64 dog_stage 1993 non-null object dtypes: bool(3), float64(3), int64(7), object(11) memory usage: 348.4+ KB
# Test
list(df_master)
['tweet_id', 'timestamp', 'source', 'text', 'retweeted_status_user_id', 'expanded_urls', 'rating_out_of_10', 'name', 'jpg_url', 'img_num', 'category1', 'cat1_conf', 'cat1_dog', 'category2', 'cat2_conf', 'cat2_dog', 'category3', 'cat3_conf', 'cat3_dog', 'favorites', 'retweets', 'user_followers', 'user_favourites', 'dog_stage']
# Store the clean DataFrame in a CSV file
df_master.to_csv('twitter_archive_master.csv', index=False, encoding = 'utf-8')
df_master = pd.read_csv('twitter_archive_master.csv')
df_master.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1993 entries, 0 to 1992 Data columns (total 24 columns): tweet_id 1993 non-null int64 timestamp 1993 non-null object source 1993 non-null object text 1993 non-null object retweeted_status_user_id 0 non-null float64 expanded_urls 1993 non-null object rating_out_of_10 1993 non-null int64 name 1993 non-null object jpg_url 1993 non-null object img_num 1993 non-null int64 category1 1993 non-null object cat1_conf 1993 non-null float64 cat1_dog 1993 non-null bool category2 1993 non-null object cat2_conf 1993 non-null float64 cat2_dog 1993 non-null bool category3 1993 non-null object cat3_conf 1993 non-null float64 cat3_dog 1993 non-null bool favorites 1993 non-null int64 retweets 1993 non-null int64 user_followers 1993 non-null int64 user_favourites 1993 non-null int64 dog_stage 1993 non-null object dtypes: bool(3), float64(4), int64(7), object(10) memory usage: 332.9+ KB
Data cleaing Section ends here.....
visualization section begins here....
# Import the clean dataset into dataframe
df_master = pd.read_csv('twitter_archive_master.csv')
df_master.info()
df_master
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1993 entries, 0 to 1992 Data columns (total 24 columns): tweet_id 1993 non-null int64 timestamp 1993 non-null object source 1993 non-null object text 1993 non-null object retweeted_status_user_id 0 non-null float64 expanded_urls 1993 non-null object rating_out_of_10 1993 non-null int64 name 1993 non-null object jpg_url 1993 non-null object img_num 1993 non-null int64 category1 1993 non-null object cat1_conf 1993 non-null float64 cat1_dog 1993 non-null bool category2 1993 non-null object cat2_conf 1993 non-null float64 cat2_dog 1993 non-null bool category3 1993 non-null object cat3_conf 1993 non-null float64 cat3_dog 1993 non-null bool favorites 1993 non-null int64 retweets 1993 non-null int64 user_followers 1993 non-null int64 user_favourites 1993 non-null int64 dog_stage 1993 non-null object dtypes: bool(3), float64(4), int64(7), object(10) memory usage: 332.9+ KB
tweet_id | timestamp | source | text | retweeted_status_user_id | expanded_urls | rating_out_of_10 | name | jpg_url | img_num | ... | cat2_conf | cat2_dog | category3 | cat3_conf | cat3_dog | favorites | retweets | user_followers | user_favourites | dog_stage | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 892420643555336193 | 2017-08-01 16:23:56 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Phineas. He's a mystical boy. Only eve... | NaN | https://twitter.com/dog_rates/status/892420643... | 13 | Phineas | https://pbs.twimg.com/media/DGKD1-bXoAAIAUK.jpg | 1 | ... | 0.085851 | False | banana | 0.076110 | False | 38680 | 8559 | 6997101 | 134700 | |
1 | 892177421306343426 | 2017-08-01 00:17:27 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Tilly. She's just checking pup on you.... | NaN | https://twitter.com/dog_rates/status/892177421... | 13 | Tilly | https://pbs.twimg.com/media/DGGmoV4XsAAUL6n.jpg | 1 | ... | 0.090647 | True | papillon | 0.068957 | True | 33156 | 6291 | 6997101 | 134700 | |
2 | 891815181378084864 | 2017-07-31 00:18:03 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Archie. He is a rare Norwegian Pouncin... | NaN | https://twitter.com/dog_rates/status/891815181... | 12 | Archie | https://pbs.twimg.com/media/DGBdLU1WsAANxJ9.jpg | 1 | ... | 0.078253 | True | kelpie | 0.031379 | True | 24958 | 4170 | 6997101 | 134700 | |
3 | 891689557279858688 | 2017-07-30 15:58:51 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Darla. She commenced a snooze mid meal... | NaN | https://twitter.com/dog_rates/status/891689557... | 13 | Darla | https://pbs.twimg.com/media/DF_q7IAWsAEuuN8.jpg | 1 | ... | 0.168086 | True | spatula | 0.040836 | False | 42064 | 8687 | 6997101 | 134700 | |
4 | 891327558926688256 | 2017-07-29 16:00:24 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Franklin. He would like you to stop ca... | NaN | https://twitter.com/dog_rates/status/891327558... | 12 | Franklin | https://pbs.twimg.com/media/DF6hr6BUMAAzZgT.jpg | 2 | ... | 0.225770 | True | German_short-haired_pointer | 0.175219 | True | 40212 | 9447 | 6997101 | 134700 | |
5 | 891087950875897856 | 2017-07-29 00:08:17 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a majestic great white breaching ... | NaN | https://twitter.com/dog_rates/status/891087950... | 13 | None | https://pbs.twimg.com/media/DF3HwyEWsAABqE6.jpg | 1 | ... | 0.116317 | True | Indian_elephant | 0.076902 | False | 20163 | 3127 | 6997101 | 134700 | |
6 | 890971913173991426 | 2017-07-28 16:27:12 +0000 | <a href="http://twitter.com/download/iphone" r... | Meet Jax. He enjoys ice cream so much he gets ... | NaN | https://gofundme.com/ydvmve-surgery-for-jax,ht... | 13 | Jax | https://pbs.twimg.com/media/DF1eOmZXUAALUcq.jpg | 1 | ... | 0.199287 | True | ice_lolly | 0.193548 | False | 11816 | 2082 | 6997101 | 134700 | |
7 | 890729181411237888 | 2017-07-28 00:22:40 +0000 | <a href="http://twitter.com/download/iphone" r... | When you watch your owner call another dog a g... | NaN | https://twitter.com/dog_rates/status/890729181... | 13 | None | https://pbs.twimg.com/media/DFyBahAVwAAhUTd.jpg | 2 | ... | 0.178406 | True | Pembroke | 0.076507 | True | 65338 | 18971 | 6997101 | 134700 | |
8 | 890609185150312448 | 2017-07-27 16:25:51 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Zoey. She doesn't want to be one of th... | NaN | https://twitter.com/dog_rates/status/890609185... | 13 | Zoey | https://pbs.twimg.com/media/DFwUU__XcAEpyXI.jpg | 1 | ... | 0.193054 | True | Chesapeake_Bay_retriever | 0.118184 | True | 27713 | 4279 | 6997101 | 134700 | |
9 | 890240255349198849 | 2017-07-26 15:59:51 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Cassie. She is a college pup. Studying... | NaN | https://twitter.com/dog_rates/status/890240255... | 14 | Cassie | https://pbs.twimg.com/media/DFrEyVuW0AAO3t9.jpg | 1 | ... | 0.451038 | True | Chihuahua | 0.029248 | True | 31857 | 7448 | 6997101 | 134700 | doggo |
10 | 890006608113172480 | 2017-07-26 00:31:25 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Koda. He is a South Australian decksha... | NaN | https://twitter.com/dog_rates/status/890006608... | 13 | Koda | https://pbs.twimg.com/media/DFnwSY4WAAAMliS.jpg | 1 | ... | 0.013884 | True | chow | 0.008167 | True | 30583 | 7363 | 6997101 | 134700 | |
11 | 889880896479866881 | 2017-07-25 16:11:53 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Bruno. He is a service shark. Only get... | NaN | https://twitter.com/dog_rates/status/889880896... | 13 | Bruno | https://pbs.twimg.com/media/DFl99B1WsAITKsg.jpg | 1 | ... | 0.151317 | True | muzzle | 0.082981 | False | 27719 | 4989 | 6997101 | 134700 | |
12 | 889665388333682689 | 2017-07-25 01:55:32 +0000 | <a href="http://twitter.com/download/iphone" r... | Here's a puppo that seems to be on the fence a... | NaN | https://twitter.com/dog_rates/status/889665388... | 13 | None | https://pbs.twimg.com/media/DFi579UWsAAatzw.jpg | 1 | ... | 0.027356 | True | basenji | 0.004633 | True | 48003 | 10101 | 6997101 | 134700 | puppo |
13 | 889638837579907072 | 2017-07-25 00:10:02 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Ted. He does his best. Sometimes that'... | NaN | https://twitter.com/dog_rates/status/889638837... | 12 | Ted | https://pbs.twimg.com/media/DFihzFfXsAYGDPR.jpg | 1 | ... | 0.002129 | True | Staffordshire_bullterrier | 0.001498 | True | 27103 | 4564 | 6997101 | 134700 | |
14 | 889531135344209921 | 2017-07-24 17:02:04 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Stuart. He's sporting his favorite fan... | NaN | https://twitter.com/dog_rates/status/889531135... | 13 | Stuart | https://pbs.twimg.com/media/DFg_2PVW0AEHN3p.jpg | 1 | ... | 0.013834 | True | redbone | 0.007958 | True | 15052 | 2244 | 6997101 | 134700 | puppo |
15 | 889278841981685760 | 2017-07-24 00:19:32 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Oliver. You're witnessing one of his m... | NaN | https://twitter.com/dog_rates/status/889278841... | 13 | Oliver | https://pbs.twimg.com/ext_tw_video_thumb/88927... | 1 | ... | 0.194742 | True | Saluki | 0.027351 | True | 25236 | 5446 | 6997101 | 134700 | |
16 | 888917238123831296 | 2017-07-23 00:22:39 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Jim. He found a fren. Taught him how t... | NaN | https://twitter.com/dog_rates/status/888917238... | 12 | Jim | https://pbs.twimg.com/media/DFYRgsOUQAARGhO.jpg | 1 | ... | 0.120184 | True | Labrador_retriever | 0.105506 | True | 28996 | 4517 | 6997101 | 134700 | |
17 | 888804989199671297 | 2017-07-22 16:56:37 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Zeke. He has a new stick. Very proud o... | NaN | https://twitter.com/dog_rates/status/888804989... | 13 | Zeke | https://pbs.twimg.com/media/DFWra-3VYAA2piG.jpg | 1 | ... | 0.184172 | True | English_setter | 0.073482 | True | 25527 | 4363 | 6997101 | 134700 | |
18 | 888554962724278272 | 2017-07-22 00:23:06 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Ralphus. He's powering up. Attempting ... | NaN | https://twitter.com/dog_rates/status/888554962... | 13 | Ralphus | https://pbs.twimg.com/media/DFTH_O-UQAACu20.jpg | 3 | ... | 0.166511 | True | malamute | 0.111411 | True | 19850 | 3596 | 6997101 | 134700 | |
19 | 888078434458587136 | 2017-07-20 16:49:33 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Gerald. He was just told he didn't get... | NaN | https://twitter.com/dog_rates/status/888078434... | 12 | Gerald | https://pbs.twimg.com/media/DFMWn56WsAAkA7B.jpg | 1 | ... | 0.000932 | True | bull_mastiff | 0.000903 | True | 21702 | 3511 | 6997101 | 134700 | |
20 | 887705289381826560 | 2017-07-19 16:06:48 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Jeffrey. He has a monopoly on the pool... | NaN | https://twitter.com/dog_rates/status/887705289... | 13 | Jeffrey | https://pbs.twimg.com/media/DFHDQBbXgAEqY7t.jpg | 1 | ... | 0.087582 | True | Weimaraner | 0.026236 | True | 30108 | 5414 | 6997101 | 134700 | |
21 | 887517139158093824 | 2017-07-19 03:39:09 +0000 | <a href="http://twitter.com/download/iphone" r... | I've yet to rate a Venezuelan Hover Wiener. Th... | NaN | https://twitter.com/dog_rates/status/887517139... | 14 | None | https://pbs.twimg.com/ext_tw_video_thumb/88751... | 1 | ... | 0.029175 | False | shopping_cart | 0.026321 | False | 46119 | 11713 | 6997101 | 134700 | |
22 | 887473957103951883 | 2017-07-19 00:47:34 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Canela. She attempted some fancy porch... | NaN | https://twitter.com/dog_rates/status/887473957... | 13 | Canela | https://pbs.twimg.com/media/DFDw2tyUQAAAFke.jpg | 2 | ... | 0.054950 | True | beagle | 0.038915 | True | 68935 | 18305 | 6997101 | 134700 | |
23 | 887343217045368832 | 2017-07-18 16:08:03 +0000 | <a href="http://twitter.com/download/iphone" r... | You may not have known you needed to see this ... | NaN | https://twitter.com/dog_rates/status/887343217... | 13 | None | https://pbs.twimg.com/ext_tw_video_thumb/88734... | 1 | ... | 0.275645 | False | Weimaraner | 0.134203 | True | 33582 | 10446 | 6997101 | 134700 | |
24 | 887101392804085760 | 2017-07-18 00:07:08 +0000 | <a href="http://twitter.com/download/iphone" r... | This... is a Jubilant Antarctic House Bear. We... | NaN | https://twitter.com/dog_rates/status/887101392... | 12 | None | https://pbs.twimg.com/media/DE-eAq6UwAA-jaE.jpg | 1 | ... | 0.035029 | True | Staffordshire_bullterrier | 0.029705 | True | 30451 | 5984 | 6997101 | 134700 | |
25 | 886983233522544640 | 2017-07-17 16:17:36 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Maya. She's very shy. Rarely leaves he... | NaN | https://twitter.com/dog_rates/status/886983233... | 13 | Maya | https://pbs.twimg.com/media/DE8yicJW0AAAvBJ.jpg | 2 | ... | 0.143528 | True | can_opener | 0.032253 | False | 35058 | 7806 | 6997101 | 134700 | |
26 | 886736880519319552 | 2017-07-16 23:58:41 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Mingus. He's a wonderful father to his... | NaN | https://www.gofundme.com/mingusneedsus,https:/... | 13 | Mingus | https://pbs.twimg.com/media/DE5Se8FXcAAJFx4.jpg | 1 | ... | 0.186136 | True | Dandie_Dinmont | 0.086346 | True | 12037 | 3311 | 6997101 | 134700 | |
27 | 886680336477933568 | 2017-07-16 20:14:00 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Derek. He's late for a dog meeting. 13... | NaN | https://twitter.com/dog_rates/status/886680336... | 13 | Derek | https://pbs.twimg.com/media/DE4fEDzWAAAyHMM.jpg | 1 | ... | 0.139952 | False | car_wheel | 0.044173 | False | 22362 | 4486 | 6997101 | 134700 | |
28 | 886366144734445568 | 2017-07-15 23:25:31 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Roscoe. Another pupper fallen victim t... | NaN | https://twitter.com/dog_rates/status/886366144... | 12 | Roscoe | https://pbs.twimg.com/media/DE0BTnQUwAApKEH.jpg | 1 | ... | 0.000361 | True | Boston_bull | 0.000076 | True | 21142 | 3208 | 6997102 | 134700 | pupper |
29 | 886258384151887873 | 2017-07-15 16:17:19 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Waffles. His doggles are pupside down.... | NaN | https://twitter.com/dog_rates/status/886258384... | 13 | Waffles | https://pbs.twimg.com/media/DEyfTG4UMAE4aE9.jpg | 1 | ... | 0.025286 | False | Siamese_cat | 0.002849 | False | 27905 | 6316 | 6997102 | 134700 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1963 | 666411507551481857 | 2015-11-17 00:24:19 +0000 | <a href="http://twitter.com/download/iphone" r... | This is quite the dog. Gets really excited whe... | NaN | https://twitter.com/dog_rates/status/666411507... | 2 | None | https://pbs.twimg.com/media/CT-RugiWIAELEaq.jpg | 1 | ... | 0.271485 | False | gar | 0.189945 | False | 447 | 328 | 6997189 | 134700 | |
1964 | 666407126856765440 | 2015-11-17 00:06:54 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a southern Vesuvius bumblegruff. Can d... | NaN | https://twitter.com/dog_rates/status/666407126... | 7 | None | https://pbs.twimg.com/media/CT-NvwmW4AAugGZ.jpg | 1 | ... | 0.244220 | True | flat-coated_retriever | 0.173810 | True | 110 | 41 | 6997189 | 134700 | |
1965 | 666396247373291520 | 2015-11-16 23:23:41 +0000 | <a href="http://twitter.com/download/iphone" r... | Oh goodness. A super rare northeast Qdoba kang... | NaN | https://twitter.com/dog_rates/status/666396247... | 9 | None | https://pbs.twimg.com/media/CT-D2ZHWIAA3gK1.jpg | 1 | ... | 0.009397 | True | papillon | 0.004577 | True | 167 | 86 | 6997189 | 134700 | |
1966 | 666373753744588802 | 2015-11-16 21:54:18 +0000 | <a href="http://twitter.com/download/iphone" r... | Those are sunglasses and a jean jacket. 11/10 ... | NaN | https://twitter.com/dog_rates/status/666373753... | 11 | None | https://pbs.twimg.com/media/CT9vZEYWUAAlZ05.jpg | 1 | ... | 0.259551 | True | briard | 0.206803 | True | 190 | 93 | 6997189 | 134700 | |
1967 | 666362758909284353 | 2015-11-16 21:10:36 +0000 | <a href="http://twitter.com/download/iphone" r... | Unique dog here. Very small. Lives in containe... | NaN | https://twitter.com/dog_rates/status/666362758... | 6 | None | https://pbs.twimg.com/media/CT9lXGsUcAAyUFt.jpg | 1 | ... | 0.002402 | False | hamster | 0.000461 | False | 778 | 574 | 6997189 | 134700 | |
1968 | 666353288456101888 | 2015-11-16 20:32:58 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a mixed Asiago from the GalƔpagos... | NaN | https://twitter.com/dog_rates/status/666353288... | 8 | None | https://pbs.twimg.com/media/CT9cx0tUEAAhNN_.jpg | 1 | ... | 0.147655 | True | Eskimo_dog | 0.093412 | True | 221 | 73 | 6997190 | 134700 | |
1969 | 666345417576210432 | 2015-11-16 20:01:42 +0000 | <a href="http://twitter.com/download/iphone" r... | Look at this jokester thinking seat belt laws ... | NaN | https://twitter.com/dog_rates/status/666345417... | 10 | None | https://pbs.twimg.com/media/CT9Vn7PWoAA_ZCM.jpg | 1 | ... | 0.054787 | True | Labrador_retriever | 0.014241 | True | 299 | 139 | 6997190 | 134700 | |
1970 | 666337882303524864 | 2015-11-16 19:31:45 +0000 | <a href="http://twitter.com/download/iphone" r... | This is an extremely rare horned Parthenon. No... | NaN | https://twitter.com/dog_rates/status/666337882... | 9 | None | https://pbs.twimg.com/media/CT9OwFIWEAMuRje.jpg | 1 | ... | 0.278407 | True | groenendael | 0.102643 | True | 198 | 92 | 6997190 | 134700 | |
1971 | 666293911632134144 | 2015-11-16 16:37:02 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a funny dog. Weird toes. Won't come do... | NaN | https://twitter.com/dog_rates/status/666293911... | 3 | None | https://pbs.twimg.com/media/CT8mx7KW4AEQu8N.jpg | 1 | ... | 0.015250 | False | great_grey_owl | 0.013207 | False | 509 | 357 | 6997190 | 134700 | |
1972 | 666287406224695296 | 2015-11-16 16:11:11 +0000 | <a href="http://twitter.com/download/iphone" r... | This is an Albanian 3 1/2 legged Episcopalian... | NaN | https://twitter.com/dog_rates/status/666287406... | 1 | None | https://pbs.twimg.com/media/CT8g3BpUEAAuFjg.jpg | 1 | ... | 0.063064 | True | miniature_poodle | 0.025581 | True | 149 | 66 | 6997190 | 134700 | |
1973 | 666273097616637952 | 2015-11-16 15:14:19 +0000 | <a href="http://twitter.com/download/iphone" r... | Can take selfies 11/10 https://t.co/ws2AMaNwPW | NaN | https://twitter.com/dog_rates/status/666273097... | 11 | None | https://pbs.twimg.com/media/CT8T1mtUwAA3aqm.jpg | 1 | ... | 0.111884 | True | basenji | 0.111152 | True | 176 | 76 | 6997190 | 134700 | |
1974 | 666268910803644416 | 2015-11-16 14:57:41 +0000 | <a href="http://twitter.com/download/iphone" r... | Very concerned about fellow dog trapped in com... | NaN | https://twitter.com/dog_rates/status/666268910... | 10 | None | https://pbs.twimg.com/media/CT8QCd1WEAADXws.jpg | 1 | ... | 0.085547 | False | bookcase | 0.079480 | False | 104 | 35 | 6997190 | 134700 | |
1975 | 666104133288665088 | 2015-11-16 04:02:55 +0000 | <a href="http://twitter.com/download/iphone" r... | Not familiar with this breed. No tail (weird).... | NaN | https://twitter.com/dog_rates/status/666104133... | 1 | None | https://pbs.twimg.com/media/CT56LSZWoAAlJj2.jpg | 1 | ... | 0.033919 | False | partridge | 0.000052 | False | 14346 | 6634 | 6997190 | 134700 | |
1976 | 666102155909144576 | 2015-11-16 03:55:04 +0000 | <a href="http://twitter.com/download/iphone" r... | Oh my. Here you are seeing an Adobe Setter giv... | NaN | https://twitter.com/dog_rates/status/666102155... | 11 | None | https://pbs.twimg.com/media/CT54YGiWUAEZnoK.jpg | 1 | ... | 0.149842 | True | borzoi | 0.133649 | True | 80 | 13 | 6997190 | 134700 | |
1977 | 666099513787052032 | 2015-11-16 03:44:34 +0000 | <a href="http://twitter.com/download/iphone" r... | Can stand on stump for what seems like a while... | NaN | https://twitter.com/dog_rates/status/666099513... | 8 | None | https://pbs.twimg.com/media/CT51-JJUEAA6hV8.jpg | 1 | ... | 0.166192 | True | Dandie_Dinmont | 0.089688 | True | 156 | 68 | 6997190 | 134700 | |
1978 | 666094000022159362 | 2015-11-16 03:22:39 +0000 | <a href="http://twitter.com/download/iphone" r... | This appears to be a Mongolian Presbyterian mi... | NaN | https://twitter.com/dog_rates/status/666094000... | 9 | None | https://pbs.twimg.com/media/CT5w9gUW4AAsBNN.jpg | 1 | ... | 0.078260 | True | malinois | 0.075628 | True | 164 | 74 | 6997190 | 134700 | |
1979 | 666082916733198337 | 2015-11-16 02:38:37 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a well-established sunblockerspan... | NaN | https://twitter.com/dog_rates/status/666082916... | 6 | None | https://pbs.twimg.com/media/CT5m4VGWEAAtKc8.jpg | 1 | ... | 0.404722 | True | French_bulldog | 0.048960 | True | 119 | 45 | 6997190 | 134700 | |
1980 | 666073100786774016 | 2015-11-16 01:59:36 +0000 | <a href="http://twitter.com/download/iphone" r... | Let's hope this flight isn't Malaysian (lol). ... | NaN | https://twitter.com/dog_rates/status/666073100... | 10 | None | https://pbs.twimg.com/media/CT5d9DZXAAALcwe.jpg | 1 | ... | 0.175382 | True | Ibizan_hound | 0.097471 | True | 322 | 164 | 6997190 | 134700 | |
1981 | 666071193221509120 | 2015-11-16 01:52:02 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a northern speckled Rhododendron.... | NaN | https://twitter.com/dog_rates/status/666071193... | 9 | None | https://pbs.twimg.com/media/CT5cN_3WEAAlOoZ.jpg | 1 | ... | 0.174201 | True | Pekinese | 0.109454 | True | 148 | 62 | 6997190 | 134700 | |
1982 | 666063827256086533 | 2015-11-16 01:22:45 +0000 | <a href="http://twitter.com/download/iphone" r... | This is the happiest dog you will ever see. Ve... | NaN | https://twitter.com/dog_rates/status/666063827... | 10 | None | https://pbs.twimg.com/media/CT5Vg_wXIAAXfnj.jpg | 1 | ... | 0.093718 | True | Labrador_retriever | 0.072427 | True | 476 | 219 | 6997190 | 134700 | |
1983 | 666058600524156928 | 2015-11-16 01:01:59 +0000 | <a href="http://twitter.com/download/iphone" r... | Here is the Rand Paul of retrievers folks! He'... | NaN | https://twitter.com/dog_rates/status/666058600... | 8 | None | https://pbs.twimg.com/media/CT5Qw94XAAA_2dP.jpg | 1 | ... | 0.192305 | True | soft-coated_wheaten_terrier | 0.082086 | True | 112 | 57 | 6997190 | 134700 | |
1984 | 666057090499244032 | 2015-11-16 00:55:59 +0000 | <a href="http://twitter.com/download/iphone" r... | My oh my. This is a rare blond Canadian terrie... | NaN | https://twitter.com/dog_rates/status/666057090... | 9 | None | https://pbs.twimg.com/media/CT5PY90WoAAQGLo.jpg | 1 | ... | 0.014594 | False | golden_retriever | 0.007959 | True | 298 | 142 | 6997190 | 134700 | |
1985 | 666055525042405380 | 2015-11-16 00:49:46 +0000 | <a href="http://twitter.com/download/iphone" r... | Here is a Siberian heavily armored polar bear ... | NaN | https://twitter.com/dog_rates/status/666055525... | 10 | None | https://pbs.twimg.com/media/CT5N9tpXIAAifs1.jpg | 1 | ... | 0.058279 | True | fur_coat | 0.054449 | False | 434 | 252 | 6997190 | 134700 | |
1986 | 666051853826850816 | 2015-11-16 00:35:11 +0000 | <a href="http://twitter.com/download/iphone" r... | This is an odd dog. Hard on the outside but lo... | NaN | https://twitter.com/dog_rates/status/666051853... | 2 | None | https://pbs.twimg.com/media/CT5KoJ1WoAAJash.jpg | 1 | ... | 0.045885 | False | terrapin | 0.017885 | False | 1223 | 853 | 6997190 | 134700 | |
1987 | 666050758794694657 | 2015-11-16 00:30:50 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a truly beautiful English Wilson Staff... | NaN | https://twitter.com/dog_rates/status/666050758... | 10 | None | https://pbs.twimg.com/media/CT5Jof1WUAEuVxN.jpg | 1 | ... | 0.263788 | True | Greater_Swiss_Mountain_dog | 0.016199 | True | 132 | 58 | 6997190 | 134700 | |
1988 | 666049248165822465 | 2015-11-16 00:24:50 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a 1949 1st generation vulpix. Enj... | NaN | https://twitter.com/dog_rates/status/666049248... | 5 | None | https://pbs.twimg.com/media/CT5IQmsXIAAKY4A.jpg | 1 | ... | 0.243682 | True | Doberman | 0.154629 | True | 109 | 41 | 6997190 | 134700 | |
1989 | 666044226329800704 | 2015-11-16 00:04:52 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a purebred Piers Morgan. Loves to Netf... | NaN | https://twitter.com/dog_rates/status/666044226... | 6 | None | https://pbs.twimg.com/media/CT5Dr8HUEAA-lEu.jpg | 1 | ... | 0.360687 | True | miniature_pinscher | 0.222752 | True | 299 | 141 | 6997190 | 134700 | |
1990 | 666033412701032449 | 2015-11-15 23:21:54 +0000 | <a href="http://twitter.com/download/iphone" r... | Here is a very happy pup. Big fan of well-main... | NaN | https://twitter.com/dog_rates/status/666033412... | 9 | None | https://pbs.twimg.com/media/CT4521TWwAEvMyu.jpg | 1 | ... | 0.138584 | True | bloodhound | 0.116197 | True | 125 | 44 | 6997190 | 134700 | |
1991 | 666029285002620928 | 2015-11-15 23:05:30 +0000 | <a href="http://twitter.com/download/iphone" r... | This is a western brown Mitsubishi terrier. Up... | NaN | https://twitter.com/dog_rates/status/666029285... | 7 | None | https://pbs.twimg.com/media/CT42GRgUYAA5iDo.jpg | 1 | ... | 0.074192 | True | Rhodesian_ridgeback | 0.072010 | True | 129 | 47 | 6997190 | 134700 | |
1992 | 666020888022790149 | 2015-11-15 22:32:08 +0000 | <a href="http://twitter.com/download/iphone" r... | Here we have a Japanese Irish Setter. Lost eye... | NaN | https://twitter.com/dog_rates/status/666020888... | 8 | None | https://pbs.twimg.com/media/CT4udn0WwAA0aMy.jpg | 1 | ... | 0.156665 | True | Shetland_sheepdog | 0.061428 | True | 2560 | 517 | 6997190 | 134700 |
1993 rows × 24 columns
#drwaing a scatter plot with axes as tweet_id and No. of retweets.
df_master.plot(kind = 'scatter', x = 'tweet_id', y = 'retweets', alpha = 0.5, color = 'red')
plt.xlabel('tweet_id')
plt.ylabel('retweets')
plt.title('Tweet Id vs Retweet Scatter plot')
Text(0.5,1,'Tweet Id vs Retweet Scatter plot')
#drawing a scatter plot with tweet_id and no. of user-favorites as axes.
df_master.plot(kind = 'scatter', x = 'tweet_id', y = 'user_favourites', alpha = 1, color = 'red')
plt.xlabel('tweet_id')
plt.ylabel('user_favourites')
plt.title('Tweet and user favorites Scatter plot')
Text(0.5,1,'Tweet and user favorites Scatter plot')
#draw bar chart for 5 most favourite tweets
test = df_master.sort_values(['favorites'], ascending=False)
test1 = test._slice(slice(0, 5))
test1.plot(x='tweet_id', y='favorites', kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0x7f4fb912def0>
#top 10 most retweeted tweets
#draw bar chart for 5 most favourite tweets
test = df_master.sort_values(['retweets'], ascending=False)
test1 = test._slice(slice(0, 5))
test1.plot(x='tweet_id', y='retweets', kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0x7f4fb9137b38>
#Display top 5 dog breeds
df_master['category2'].value_counts().head(5)
Labrador_retriever 96 golden_retriever 82 Cardigan 72 Chihuahua 43 Chesapeake_Bay_retriever 40 Name: category2, dtype: int64
#take top 5 breeds and name all other breeds as 'Others' and then draw a pie chart on breed breakup
df1 = df_master.copy()
df1.loc[(df1['category2'] != 'Labrador_retriever') & (df1['category2'] != 'golden_retriever')
& (df1['category2'] != 'Cardigan')
& (df1['category2'] != 'Chihuahua')
& (df1['category2'] != 'Pomeranian')
, 'category2'] = 'Others'
df1
# plot chart for dog breed type distribution
#
df1[df1['category2'].notnull()]['category2'].value_counts().plot(kind = 'pie', autopct='%1.1f%%')
plt.title('Dog breed distribution')
Text(0.5,1,'Dog breed distribution')
# demonstration of using seaborn:plot scatter plot for retweets
sns.lmplot('tweet_id', 'retweets', data=df_master, fit_reg=False)
<seaborn.axisgrid.FacetGrid at 0x7f4fbb0eb668>
**visualization section ends here....
- Get link
- X
- Other Apps
- Get link
- X
- Other Apps
Comments
Post a Comment