Created
June 13, 2018 08:31
-
-
Save vaaaaanquish-xx/ba4749092840f458ce2632aab98285ae to your computer and use it in GitHub Desktop.
imgsearch_on_xonsh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import os | |
import sys | |
from mimetypes import guess_extension | |
from time import time, sleep | |
from urllib.request import urlopen, Request | |
from urllib.parse import quote | |
from bs4 import BeautifulSoup | |
from PIL import Image | |
from multiprocessing import Pool | |
from multiprocessing import cpu_count | |
def _request(url): | |
req = Request(url) | |
try: | |
with urlopen(req, timeout=3) as p: | |
b_content = p.read() | |
mime = p.getheader('Content-Type') | |
except: | |
return None, None | |
return b_content, mime | |
def _yahoo_img_search(word): | |
url = 'http://image.search.yahoo.co.jp/search?n=60&p={}&search.x=1'.format(quote(word)) | |
byte_content, _ = _request(url) | |
structured_page = BeautifulSoup(byte_content.decode('UTF-8'), 'html.parser') | |
img_link_elems = structured_page.find_all('a', attrs={'target': 'imagewin'}) | |
seen = set() | |
seen_add = seen.add | |
img_urls = [e.get('href') for e in img_link_elems if e.get('href') not in seen and not seen_add(e.get('href'))] | |
return img_urls | |
def _save_img(t): | |
img, mime = _request(t.split('\t')[1]) | |
if mime is None or img is None: | |
return '' | |
ext = guess_extension(mime.split(';')[0]) | |
if ext in ('.jpe', '.jpeg', '.png', '.gif'): | |
ext = '.jpg' | |
if not ext: | |
return '' | |
result_file = os.path.join('/tmp/img', t.split('\t')[0] + ext) | |
with open(result_file, mode='wb') as f: | |
f.write(img) | |
sys.stdout.write('.') | |
sys.stdout.flush() | |
return result_file | |
def _img_d(word): | |
data_dir = '/tmp/img' | |
if not os.path.exists(data_dir): | |
os.makedirs(data_dir) | |
t = _yahoo_img_search(word) | |
if len(t)<10: | |
print('Not Found 10 IMG.') | |
return [], [] | |
t = t[:10] | |
urls = [str(i)+'\t'+x for i,x in enumerate(t)] | |
cpu = cpu_count() | |
p = Pool(cpu-1) | |
a = p.map(_save_img, urls) | |
p.close() | |
print('saved images.') | |
return a, t | |
def _imgs(word): | |
word = ' '.join(word) | |
paths, urls = _img_d(word) | |
if not paths or not urls: | |
print('Bad input.') | |
else: | |
img = Image.new('RGB', (250 * 5, 500)) | |
for j in range(10): | |
im = Image.open(paths[j]).resize((250, 250)) | |
if j >= 5: | |
img.paste(im, ( 250*(j-5), 250)) | |
else: | |
img.paste(im, ( 250*j, 0)) | |
img.save("/tmp/h.jpg") | |
imgcat /tmp/h.jpg | |
img_num = input('image number(1~10) : ') | |
try: | |
echo -n @(urls[int(img_num)+1]) | pbcopy | |
except: | |
print('Bad input.') | |
aliases['imgs'] = _imgs |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment