Skip to content

Instantly share code, notes, and snippets.

@vaaaaanquish-xx
Created June 13, 2018 08:31
Show Gist options
  • Save vaaaaanquish-xx/ba4749092840f458ce2632aab98285ae to your computer and use it in GitHub Desktop.
Save vaaaaanquish-xx/ba4749092840f458ce2632aab98285ae to your computer and use it in GitHub Desktop.
imgsearch_on_xonsh
# -*- coding: utf-8 -*-
import os
import sys
from mimetypes import guess_extension
from time import time, sleep
from urllib.request import urlopen, Request
from urllib.parse import quote
from bs4 import BeautifulSoup
from PIL import Image
from multiprocessing import Pool
from multiprocessing import cpu_count
def _request(url):
req = Request(url)
try:
with urlopen(req, timeout=3) as p:
b_content = p.read()
mime = p.getheader('Content-Type')
except:
return None, None
return b_content, mime
def _yahoo_img_search(word):
url = 'http://image.search.yahoo.co.jp/search?n=60&p={}&search.x=1'.format(quote(word))
byte_content, _ = _request(url)
structured_page = BeautifulSoup(byte_content.decode('UTF-8'), 'html.parser')
img_link_elems = structured_page.find_all('a', attrs={'target': 'imagewin'})
seen = set()
seen_add = seen.add
img_urls = [e.get('href') for e in img_link_elems if e.get('href') not in seen and not seen_add(e.get('href'))]
return img_urls
def _save_img(t):
img, mime = _request(t.split('\t')[1])
if mime is None or img is None:
return ''
ext = guess_extension(mime.split(';')[0])
if ext in ('.jpe', '.jpeg', '.png', '.gif'):
ext = '.jpg'
if not ext:
return ''
result_file = os.path.join('/tmp/img', t.split('\t')[0] + ext)
with open(result_file, mode='wb') as f:
f.write(img)
sys.stdout.write('.')
sys.stdout.flush()
return result_file
def _img_d(word):
data_dir = '/tmp/img'
if not os.path.exists(data_dir):
os.makedirs(data_dir)
t = _yahoo_img_search(word)
if len(t)<10:
print('Not Found 10 IMG.')
return [], []
t = t[:10]
urls = [str(i)+'\t'+x for i,x in enumerate(t)]
cpu = cpu_count()
p = Pool(cpu-1)
a = p.map(_save_img, urls)
p.close()
print('saved images.')
return a, t
def _imgs(word):
word = ' '.join(word)
paths, urls = _img_d(word)
if not paths or not urls:
print('Bad input.')
else:
img = Image.new('RGB', (250 * 5, 500))
for j in range(10):
im = Image.open(paths[j]).resize((250, 250))
if j >= 5:
img.paste(im, ( 250*(j-5), 250))
else:
img.paste(im, ( 250*j, 0))
img.save("/tmp/h.jpg")
imgcat /tmp/h.jpg
img_num = input('image number(1~10) : ')
try:
echo -n @(urls[int(img_num)+1]) | pbcopy
except:
print('Bad input.')
aliases['imgs'] = _imgs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment