thot-detector/not_thot_downloader.py

import urllib.request

import cv2
import numpy as np
import os

def store_raw_images():
    blank_img = cv2.imread("thots_false/blank.jpg", cv2.IMREAD_GRAYSCALE)

    #thots
    #lnk = "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n09972458"
    #not thots
    #lnk = "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n00017222"
    lnk = "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n04081281"
    neg_image_urls = urllib.request.urlopen(lnk).read().decode()
    n = 1273
    for image_url in neg_image_urls.split("\n"):
        try:
            n += 1
            if n >= 2000:
                break
            path = "thots_false/" + str(n) + ".jpg"
            print("\n")
            print(str(n) + "/" + str(len(neg_image_urls.split("\n"))))
            print(image_url)
            urllib.request.urlretrieve(image_url, path)
            img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            if (img.shape == blank_img.shape):
                print("Blank Image Detected, deleting...")
                os.remove(path)
                continue
            rs_img = cv2.resize(img, (100, 100))
            cv2.imwrite("thots_false/" + str(n) + ".jpg", rs_img)
            print("Success")
        except Exception as e:
            print(str(e))

store_raw_images()