Using Image Recognition to Find Bird Posters That Don't Post Enough Birds

bird-nobird

I got mad love for bird twitter. I follow several hundred bird accounts, mostly found through the suggestion box. Once Twitter knows you're down with birds, it's all it'll recommend you until you've exhausted a vast swath of bird twitter. I've spent longer than I'd like to admit just going through the "who to follow" box and following everyone with a bird avatar. But eventually I came to a problem: Some of these birds don't actually post birds, they just post random things and pictures of ice cream. Normally this wouldn't be a problem, but since roughly half of these bird accounts are Japanese, my timeline has turned into an unmanageable mess of moonrunes.

Sorry, I love you bird posters, but some of you just don't post enough birds. So, I wrote a Python script that will do the following:

  1. Use the Twitter API to grab a list of my followers
  2. For every account I follow, download the profile image to a directory
  3. For every profile image, use image recognition to determine if the avatar contains a bird
  4. Once I have a list of every bird I follow, loop over every bird and pull their 30 most recent tweets
  5. For every tweet, if the tweet contains an image, use image recognition to determine if the image contains a bird
  6. Score the account out of 30 based on the number of birds it's posted
  7. Rank all birds on their bird posting frequency and save the result to a file.

I can then go through that list and manually decide which accounts to unfollow. It makes the process a whole lot easier, and the image recognition API is pretty accurate. Future improvements of this script may include considering the number of tweets a bird has, to generate a score representing how much non-bird posts clutter my timeline as a result of following a certain account.

Here's the script:

#!/usr/bin/python

app_name=""
c_key=""
c_sec=""
access_key=""
access_secret=""

import twitter
import sys
import code
import urllib.request
import requests
import operator
from os import listdir

def twitter_auth():
    return twitter.Twitter(auth = twitter.oauth.OAuth(access_key,access_secret,c_key,c_sec))

def download_profile_images(tw):
    #access_key, access_secret = twitter.oauth_dance(app_name,c_key,c_sec)

    print("Finding your followers... ", end='')
    a = tw.friends.ids(screen_name="Gexcolo")
    print("Done")

    print(len(a['ids']))
    chunks = [a['ids'][i : i + 100] for i in range(0, len(a['ids']), 100)]

    profile_images = []

    for chunk in chunks:
        print("Loading chunk of users... ", end='')
        resp = tw.users.lookup(user_id = chunk)
        print("Done")

        for user in resp:
            profile_images.append([user['screen_name'],user['profile_image_url'].replace('_normal','')])

    for item in profile_images:
        try:
            urllib.request.urlretrieve(item[1], 'downloads/%s.jpg' % (item[0]))
        except:
            continue

def is_bird(filename):
    url='https://services1.microstock.pro/keywording/keywords'

    resp = requests.post(url, files={'data': open(filename,'rb')})
    js = resp.json()

    return 'Bird' in js['keywords']['keywords']

def find_birds():
    birds = []

    files = listdir('downloads')

    for f in files:
        if is_bird('downloads/%s' % (f)):
            print("%s is a bird" % (f))
            birds.append(f.replace('.jpg',''))
        else:
            print("%s is not a bird" % (f))

    return birds

def how_many_birds(tw,bird):
    timeline = tw.statuses.user_timeline(screen_name=bird, include_rts=1, count=30)
    bird_posts = 0
    for tweet in timeline:
        if 'media' not in tweet['entities']:
            continue
        else:
            url = tweet['entities']['media'][0]['media_url']
            print("Calculating if %s's tweet is a bird... " % (bird), end='')
            urllib.request.urlretrieve(url,'tmp.jpg')
            if is_bird('tmp.jpg'):
                print("yep")
                bird_posts += 1
            else:
                print("nope")

    print("%s has %d bird posts" % (bird, bird_posts))
    return bird_posts

def main():
    tw = twitter_auth()

    #download_profile_images(tw)

    birds = find_birds()

    bird_score = {}
    for bird in birds:
        bird_score[bird] = how_many_birds(tw,bird)

    f = open('scores.txt','w')
    for item in sorted(bird_score.items(), key=operator.itemgetter(1)):
        a = "https://twitter.com/%s\t%d" % (item[0],item[1])
        print(a)
        f.write(a + "\n")

if __name__ == "__main__":
    main()