import requests
import json
import pandas as pd
import seaborn as sns

def get_api(url):
    api_return = requests.get(url)
    if api_return.status_code==200:
        api_text = json.loads(api_return.text)
    return api_text[0]['quote']

url = 'https://thesimpsonsquoteapi.glitch.me/quotes'
api_quote = get_api(url)
print(api_quote)

Can't we have one meeting that doesn't end with us digging up a corpse?


from tqdm.notebook import tqdm_notebook
df = pd.DataFrame(index=range(0, 2000), columns=['uniq_count'])
quote_set = set()
counter = 1
for quote in tqdm_notebook(df.index):
    if get_api(url) not in quote_set:
        quote_set.add(get_api(url))
        df.at[quote, 'uniq_count'] = counter
        counter = counter + 1
    else: 
        df.at[quote, 'uniq_count'] = counter
df.plot()
# can you think of a faster way to do this? maybe a different api call?

<AxesSubplot:>


print(quote_set)

{"I'm sleeping in the bath tub.", "That's where I saw the leprechaun...He told me to burn things.", "I hope I didn't brain my damage.", 'Shoplifting is a victimless crime, like punching someone in the dark.', "All I'm gonna use this bed for is sleeping, eating and maybe building a little fort.", 'For once maybe someone will call me "sir" without adding, "You\'re making a scene."', "Me fail English? That's unpossible.", 'Last night\'s "Itchy & Scratchy" was, without a doubt, the worst episode ever. Rest assured that I was on the Internet within minutes, registering my disgust throughout the world.', "Oh boy, sleep! That's where I'm a viking!", 'Oh Yeah!', "When I catch you, I'm gonna pull out your eyes and stick 'em down your pants so you can watch me kick the crap outta you, okay? Next I'm gonna use your tongue to paint my boat!", 'Why are you pleople avoiding me? Does my withered face remind you of the grim specter of death?', "Hello, Simpson. I'm riding the bus today becuase Mother hid my car keys to punish me for talking to a woman on the phone. She was right to do it.", 'Eat my shorts', 'In theory, Communism works! In theory.', 'Ah, be creative. Instead of making sandwhiches with bread, use Pop-Tarts. Instead of chewing gum, chew bacon.', 'Oh, so they have Internet on computers now!', 'Remember the time he ate my goldfish? And you lied and said I never had a goldfish. Then why did I have the bowl, Bart? Why did I have the bowl?', "But my mom says I'm cool.", 'Ahh! Sweet liquor eases the pain.', "Can't we have one meeting that doesn't end with us digging up a corpse?", "You're turning me into a criminal when all I want to be is a petty thug.", "Shut up, brain. I got friends now. I don't need you anymore.", "Last night's 'Itchy and Scratchy' was, without a doubt, the worst episode ever. Rest assured that I was on the Internet within minutes, registering my disgust throughout the world.", 'And this is the snack holder where I can put my beverage or, if you will, cupcake.', "Oh, wow, windows. I don't think I could afford this place.", 'Back in Edinburg, we had a coal miners strike. All we wanted were hats with a wee light on top. Then one day the mine collapsed. No one made it out alive, not even Willie!', "Nothing you say can upset us. We're the MTV generation.", "I don't want to sound like a killjoy, but becuase this is not to my taste I don't think anyone else should be allowed to enjoy it.", "These are my only friends...grown-up nerds like Gore Vidal. And even he's kissed more boys than I ever will.", 'Yeah. Call this an unfair generalization if you must.. but old people are no good at everything', 'Inflammable means flammable? What a country!', 'I believe the children are the future... Unless we stop them now!', 'My eyes! The goggles do nothing!', "When I look at people I don't see colors; I just see crackpot religions.", 'By chilling my loins I increase the chances of impregnating my wife.', "I can't even say the word 'titmouse' without gigggling like a schoolgirl.", 'They taste like...burning.', "Facts are meaningless. You could use facts to prove anything that's even remotely true.", 'Thank you. Come again.', 'Gah, stupid sexy Flanders!', "I think women and seamen don't mix", 'Hi, Super Nintendo Chalmers!', "Doughnuts? I told you I don't like ethnic food", 'Marriage is like a coffin and each kid is another nail.', 'I live in a single room above a bowling alley...and below another bowling alley.', "I used to be with it. But then they changed what it was. Now what I'm with isn't it, and what's it seems scary and wierd. It'll happen to you.", "Hey, I'm the chief here. Bake him away, toys."}


import io
import imageio
from PIL import Image

url = 'https://api.thecatapi.com/v1/images/search'

def get_cat(url):
    api_return = requests.get(url)
    if api_return.status_code==200:
        api_text = json.loads(api_return.text)
        cat_url = api_text[0]['url']
        r = requests.get(cat_url)
        return Image.open(io.BytesIO(r.content))
get_cat(url)


import io
import imageio
from PIL import Image

def get_cat_bounded(url):
    api_return = requests.get(url)
    if api_return.status_code==200:
        api_text = json.loads(api_return.text)
        while (api_text[0]['width']<=450 or api_text[0]['width']>=600) or \
              (api_text[0]['height']<=350 or api_text[0]['height']>=450):
            api_return = requests.get(url)
            if api_return.status_code==200:
                api_text = json.loads(api_return.text)
        r = requests.get(api_text[0]['url'])
        return Image.open(io.BytesIO(r.content))


url = 'https://api.thecatapi.com/v1/images/search'
images = []
for _ in tqdm_notebook(range(0, 50)):
    images.append(get_cat_bounded(url))
imageio.mimsave('CATS.gif', images, fps=1)


from IPython.display import Image
Image(open("CATS.gif",'rb').read())


tweets_df = pd.read_csv('tweets_cbarrie.csv', encoding='latin-1',
                        index_col=0, names=['tweet'], skiprows=1)
print(len(tweets_df[tweets_df['tweet'].str.lower().str.contains('sicss')]))

10


import pandas as pd
import string
import matplotlib.pyplot as plt
tweets_df = pd.read_csv('tweets_cbarrie.csv', encoding='latin-1',
                        index_col=0, names=['tweet'], skiprows=1)

def return_mention_counts(tweets_df):
    tag_dict = {}
    for i in tweets_df.index:
        tweet_text = tweets_df.iloc[i-1]['tweet']
        tweet = tweet_text.lower()
        tweet_tokenized = tweet.split()
        for word in tweet_tokenized:
            if (word[0:1] == '#' and len(word) > 1):
                translator=str.maketrans('','',string.punctuation)
                word = '#' + word.translate(translator)
                if word in tag_dict:
                    tag_dict[word] += 1
                else:
                    tag_dict[word] = 1
    
    df_tags = pd.DataFrame(list(tag_dict.items()),columns = ['Hashtag','Number Mentions'])
    df_tags = df_tags.set_index('Hashtag')
    df_tags = df_tags.sort_values(by='Number Mentions', ascending=False)
    return df_tags

fig, (ax1) = plt.subplots(nrows=1, ncols=1)
df_tags = return_mention_counts(tweets_df)
ax = df_tags[0:10].plot(kind='bar', figsize=(12,6), edgecolor='k', alpha=0.7, legend=False, ax=ax1, color='#3e8abb')

ax1.set_ylabel('Mention\Retweet Count')
ax1.set_title('A. What hashtag does @cbarrie spam the most?', loc='left', y=1.035, fontsize=14)
sns.despine()
plt.savefig('cbarrie_twitters.pdf',
            bbox_inches='tight', dpi=400)


import re
import os
import csv
from bs4 import BeautifulSoup
baseurl = 'http://www.thepeerage.com/'
clarendonlist = ['Charterhouse', 'Eton', 'Harrow', 'Merchant Taylor',
                 'Rugby School', 'Shrewsbury School', 'St Paul\’s School',
                 'Westminster School', 'Winchester College']
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)' +
    'AppleWebKit/537.36 (KHTML, like Gecko)' +
    'Chrome/50.0.2661.102 Safari/537.36'}
rawpath = '.'
with open(os.path.join(rawpath, 'entire_thepeerage.tsv'), 'w') as fileout:
    peerage_scraper = csv.writer(fileout, delimiter='\t',
                                 lineterminator='\n')
    peerage_scraper.writerow(['Page', 'ID', 'fullname', 'title', 'gender',
                              'born', 'died', 'narr', 'clarendon',
                              'oxbridge', 'child', 'lastedit', 'sources'])
    for page in range(1, 11):
        r = requests.get(baseurl + 'p' + str(page) + '.htm', headers=headers)
        soup = BeautifulSoup(r.content, 'html.parser')
        for person in soup.find_all("div", {"class": "itp"}):
            child = ''
            for name in person.find_all("h2", {"class": "sn sect-sn"}):
                for anchor in name.findAll('a'):
                    anchor.replaceWithChildren()
                    for sup in name.find_all('sup'):
                        sup.extract()
                    try:
                        fullname = name.text.split(',')[0].strip()
                        title = name.text.split(',')[1].strip()
                    except Exception as e:
                        fullname = name.text
                        title = 'N/A'
                    try:
                        info = person.find("div", {"class": "sinfo sect-ls"}).text
                        gender = 'N/A'
                        ID = 'N/A'
                        born = 'N/A'
                        died = 'N/A'
                        for trait in info.split(','):
                            if 'm' == trait.lower():
                                gender = 'M'
                            elif 'f' == trait.lower():
                                gender = 'F'
                            if '#' in trait.lower():
                                ID = trait.replace('#', '').strip()
                            if 'b.' in trait.lower():
                                born = trait.lower().replace('b.', '').strip()
                            if 'd.' in trait.lower():
                                died = trait.lower().replace('d.', '').strip()
                    except Exception as E:
                        pass
                    narr = person.find(
                        "div", {"class": "narr"}).text.replace('\xa0', '')
                    for school in clarendonlist:
                        if school.lower() in narr.lower():
                            clarendon = school.lower()
                        else:
                            clarendon = 'N/A'
                    if ('oxford' in narr.lower()) and ('cambridge' in narr.lower()):
                        oxbridge = 'both'
                    elif ('oxford' in narr.lower()) and ('univ' in narr.lower()):
                        oxbridge = 'oxford'
                    elif ('cambridge' in narr.lower()) and ('univ' in narr.lower()):
                        oxbridge = 'cambridge'
                    else:
                        oxbridge = 'N/A'
                    try:
                        last_edit = person.find(
                            "span", {"class": "field-le-value"}).text
                    except Exception as e:
                        last_edit = 'N/A'
                    try:
                        source = ''
                        for a in person.find_all('a', href=True):
                            if '.htm#s' in str(a):
                                source = source + a.text + ';'
                        source = source[:-1]
                    except Exception as e:
                        source = 'N/A'
                    if len(re.findall('#i(.*?)"', str(person.find_all('ul')))):
                        for childofperson in re.findall('#i(.*?)"',
                                                        str(person.find_all('ul')[0].find_all('li'))):
                            child = child + ';' + childofperson
                        child = child[1:]
                peerage_scraper.writerow([str(page), str(ID), fullname,
                                         title, gender, born, died, narr,
                                         clarendon, oxbridge, child,
                                         last_edit, source])


peerage_df = pd.read_csv('entire_thepeerage.tsv', sep='\t', encoding='latin-1')
peerage_df.loc[0, 'narr']

"Charles, HRH The Prince of Wales, 2005Photographed at the White House 2Photograph by White HouseCharles Philip Arthur George Mountbatten-Windsor, Prince of Wales was born on 14 November 1948 at Buckingham Palace, St. James's, London, EnglandG.3 He is the son of Philip Mountbatten, 1st Duke of Edinburgh and Elizabeth II Windsor, Queen of the United Kingdom.4 He was baptised on 15 December 1948 at Music Room, Buckingham Palace, St. James's, London, EnglandG. He married, firstly, Lady Diana Frances Spencer, daughter of Edward John Spencer, 8th Earl Spencer and Hon. Frances Ruth Burke Roche, on 29 July 1981 at St. Paul's Cathedral, The City, London, EnglandG.5 He and Lady Diana Frances Spencer were divorced on 28 August 1996.5 He married, secondly, Camilla Rosemary Shand, daughter of Major Bruce Middleton Hope Shand and Hon. Rosalind Maud Cubitt, on 9 April 2005 at Windsor Guildhall, Windsor, Berkshire, EnglandG, in a civil marriage.6 He and Camilla Rosemary Shand were engaged on 8 February 2005.7He gained the title of  HRH Prince Charles of the United Kingdom on 14 November 1948.1 He succeeded as the  Duke of Rothesay [S., 1469] on 6 February 1952.1 He succeeded as the  Lord of the Isles [S., 1469] on 6 February 1952.8 He held the office of Great Steward of Scotland on 6 February 1952.8 He was created  1st Duke of Cornwall [U.K.] on 6 February 1952.1 He succeeded as the  Earl of Carrick [S., 1469] on 6 February 1952.8 He succeeded as the  Baron of Renfrew [S., 1469] on 6 February 1952.8 He was created  HRH The Prince of Wales on 26 July 1958.1 He was created  1st Earl of Chester [U.K.] on 26 July 1958.1 He was educated at Cheam School, Headley, Berkshire, EnglandG.3 He was educated at Gordonstoun School, Elgin, Morayshire, ScotlandG.3 He was educated at Geelong Grammer School, Geelong, Victoria, AustraliaG.3 He graduated from Trinity College, Cambridge University, Cambridge, Cambridgeshire, EnglandG, with a Master of Arts (M.A.)3 He was appointed Knight, Order of the Garter (K.G.) in 1968.3 He was appointed Knight Grand Cross, Order of the Bath (G.C.B.) in 1975.3 He was appointed Privy Counsellor (P.C.) in 1977.3 He was appointed Knight, Order of the Thistle (K.T.) in 1977.3 He was appointed Knight, Order of Australia (A.K.) in 1981.3 He was awarded the Queen's Service Order (NZ) (Q.S.O.) in 1983.3 He wrote the book  HRH The Prince of Wales: Watercolours, published 1991 (ASIN: 0316888869.)9 He lived in 1999 at Highgrove House, Doughton, Gloucestershire, EnglandG.3 He co-authored the book  The Garden at Highgrove, published 2001.9 He co-authored the book  Highgrove: Portrait of an Estate, published 2002.9 He co-authored the book  The Elements of Organic Gardening: Highgrove - Clarence House - Birkhall, published 2007.9"


peerage_df = pd.read_csv('entire_thepeerage.tsv', sep='\t', encoding='latin-1')
peerage_df.groupby(['gender'])['gender'].count().plot(kind='bar', edgecolor='k',
                                                      alpha=0.7, color=['#3e8abb', '#f46d43'])
sns.despine()

Four Exercises to Practice Our Web-Scraping and API-Calls¶

SICSS-Oxford¶

Tuesday 15-06-2021¶

Question 1A. (Basic): Scrape and print out a quote from the Simpsons API.¶

Question 1B. (Advanced): How many quotes are on the API?¶

Question 2A. (Basic): Scrape a picture of a cat from thecatapi and programatically show it in an ipynb or rmd file.¶

2.b. (Advanced): Can you download a hundred of these pictures and turn them into a .gif?¶

Question 3.a (Basic): Search Twitter for the last two year of cbarries tweets.¶

Q3.a. Cont: How many times has he mentioned SICSS?¶

Question 3.b (Advanced): What hashtags does @cbarrie spam the most?¶

Question 4.a (Basic): Can you scrape thepeerage...?¶

Question 4.b (Advanced): What perecent of people on the first ten pages are males, and how many are females?¶