Last active
March 9, 2018 16:13
-
-
Save ijharulislam/5575566328a547506f0f8483b06ee348 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Docomo | |
input_data={ | |
"format": [ | |
{"regex_item": "Null", "regex_match": "Null", "remove_tag": "1", "item_name": "name", "xpath": "/html/body/div[2]/div[3]/div[1]/main/article/section[1]/div[1]/h2"}, | |
{"regex_item": "${1} ", "regex_match": "([\\d\\-\\\u2212\\\u2010]+)", "remove_tag": "1", "item_name": "zip", "xpath": "/html/body/div[2]/div[3]/div[1]/main/article/section[1]/div[2]/div[1]/div/div[2]/figure/table/tbody/tr[1]/td/text()[1]"}, | |
{"regex_item": "Null", "regex_match": "Null", "remove_tag": "1", "item_name": "address", "xpath": "/html/body/div[2]/div[3]/div[1]/main/article/section[1]/div[2]/div[1]/div/div[2]/figure/table/tbody/tr[1]/td/text()[2]"}, | |
{"regex_item": "Null", "regex_match": "Null", "remove_tag": "1", "item_name": "tel", "xpath": "/html/body/div[2]/div[3]/div[1]/main/article/section[1]/div[2]/div[1]/div/div[2]/figure/table/tbody/tr[1]/td/span[1]"}], | |
"coordination": [ | |
{ | |
"srid": "", | |
"regex_match": "ll=([\\-\\d\\.]+),([\\-\\d\\.]+).+", | |
"latlon_xpath":'//*[@id="map"]/div[2]/div/div[2]/a', | |
"mapurl_regex": "", | |
"tinymapurl_regex": "", | |
"lat": "$1", | |
"mapurl_xpath": "", "lon": "$2"}], "target": [{"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E5%AE%97%E8%B0%B7%E5%9C%B0%E6%96%B9&sc=area&pg=1&id=0135130101200&map=g&ot=s&p=012", "id": 1}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E8%95%A8%E5%B8%82&sc=area&pg=1&id=0354124100300&map=g&ot=s&p=110", "id": 2}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E5%9B%9B%E8%A1%97%E9%81%93%E5%B8%82&sc=area&pg=1&id=0300305415600&map=g&ot=s&p=120", "id": 3}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E6%A8%AA%E9%A0%88%E8%B3%80%E5%B8%82&sc=area&pg=1&id=0300302070400&map=g&ot=s&p=142", "id": 4}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E6%AD%A6%E8%94%B5%E6%9D%91%E5%B1%B1%E5%B8%82&sc=area&pg=1&id=0300306946200&map=g&ot=s&p=132", "id": 5}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E6%9C%AC%E5%AE%AE%E5%B8%82&sc=area&pg=1&id=0200201086000&map=g&ot=s&p=070", "id": 6}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E7%B1%B3%E6%B2%A2%E5%B8%82&sc=area&pg=1&id=0200200553700&map=g&ot=s&p=060", "id": 7}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E9%99%B8%E5%89%8D%E9%AB%98%E7%94%B0%E5%B8%82&sc=area&pg=1&id=0296160192700&map=g&ot=s&p=030", "id": 8}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E7%B1%B3%E6%B2%A2%E5%B8%82&sc=area&pg=1&id=0292120106700&map=g&ot=s&p=060", "id": 9}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E6%A8%AA%E6%89%8B%E5%B8%82&sc=area&pg=1&id=0295150152900&map=g&ot=s&p=050", "id": 10}]} | |
Hotel-Livemax | |
input_data={"target": [ | |
{"id": 1, "url": "https://www.hotel-livemax.com/osaka/umedadoyama/access"}, | |
{"id": 2, "url": "https://www.hotel-livemax.com/kagoshima/kagoshima/access"}, | |
{"id": 3, "url": "https://www.hotel-livemax.com/tokyo/shiomist/access"}, | |
{"id": 4, "url": "https://www.hotel-livemax.com/tokyo/bakurocho/access"}, | |
{"id": 5, "url": "https://www.hotel-livemax.com/tokyo/kitafuchu/access"}, | |
{"id": 6, "url": "https://www.hotel-livemax.com/aichi/nagoya/access"}, | |
{"id": 7, "url": "https://www.hotel-livemax.com/chiba/mihama/access"}, | |
{"id": 8, "url": "https://www.hotel-livemax.com/osaka/esaka/access"}, | |
{"id": 9, "url": "https://www.hotel-livemax.com/kanagawa/sagamiharast/access"}, | |
{"id": 10, "url": "https://www.hotel-livemax.com/ishikawa/kanazawast/access"}], | |
"coordination": [ | |
{ | |
"tinymapurl_regex": "", | |
"lon": "${1}", | |
"srid": "", | |
"lat": "${2}", | |
"latlon_xpath": '', | |
"regex_match": "ll=([\\-\\d\\.]+),([\\-\\d\\.]+).+", | |
"mapurl_xpath": "", | |
"mapurl_regex": ""}], "format": [{"remove_tag": "1", "regex_match": "Null", "xpath": "//*[@id=\"page_navi\"]/ul/li[3]/a", "item_name": "name", "regex_item": "Null"}, {"remove_tag": "1", "regex_match": "\u3012([\\d\\-\\\u2212\\\u2010]+)", "xpath": "//*[@id=\"top_info\"]/dl/dd[1]/text()[1]", "item_name": "zip", "regex_item": "${1}"}, {"remove_tag": "1", "regex_match": "\u3012[\\d\\-\\\u2212\\\u2010]+\\s*([^<]+)", "xpath": "//*[@id=\"top_info\"]/dl/dd[1]/text()[1]", "item_name": "address", "regex_item": "${1}"}, {"remove_tag": "1", "regex_match": "TEL\\s*[:\\.\\/]*\\s*([\\d\\-\\)\\(]+)", "xpath": "//*[@id=\"top_info\"]/dl/dd[1]", "item_name": "tel", "regex_item": "${1}"}]} | |
Sun Route | |
input_data={"coordination": [ | |
{ | |
"regex_match": "https:\\/\\/maps\\.google\\.com\\/maps\\?.*ll=([\\-\\d\\.]+),([\\-\\d\\.]+)", | |
"mapurl_regex": "", | |
"lat": "${1}", | |
"lon": "${2}", | |
"latlon_xpath": "", | |
"mapurl_xpath": "", "srid": "", "tinymapurl_regex": ""}], "target": [{"id": 1, "url": "https://www.sunroute.jp/HotelInfo/tohoku/patiogoshogawara/index.html"}, {"id": 2, "url": "https://www.sunroute.jp/HotelInfo/chugoku/tokuyama/index.html"}, {"id": 3, "url": "https://www.sunroute.jp/HotelInfo/kinki/osakanamba/index.html"}, {"id": 4, "url": "https://www.sunroute.jp/HotelInfo/tohoku/fukushima/index.html"}, {"id": 5, "url": "https://www.sunroute.jp/HotelInfo/tokyo_kanagawa/takadanobaba/index.html"}, {"id": 6, "url": "https://www.sunroute.jp/HotelInfo/koshinetsu_hokuriku/nagano/index.html"}, {"id": 7, "url": "https://www.sunroute.jp/HotelInfo/tokyo_kanagawa/ginza/index.html"}, {"id": 8, "url": "https://www.sunroute.jp/HotelInfo/koshinetsu_hokuriku/skyhoteluozu/index.html"}, {"id": 9, "url": "https://www.sunroute.jp/HotelInfo/koshinetsu_hokuriku/ueda/index.html"}, {"id": 10, "url": "https://www.sunroute.jp/HotelInfo/kanto/gardenpalace/index.html"}], "format": [{"regex_item": "Null", "regex_match": "Null", "remove_tag": "1", "item_name": "name", "xpath": "//*[@id=\"slides\"]/div[2]/ol/li[4]/text()"}, {"regex_item": "${1}", "regex_match": "\u3012\\s*([\\d\\-\\\u2212\\\u2010]+)", "remove_tag": "1", "item_name": "zip", "xpath": "//address"}, {"regex_item": "${1}", "regex_match": "\u3012\\s*[\\d\\-\\\u2212\\\u2010]+\\s*(.+)$", "remove_tag": "1", "item_name": "address", "xpath": "//address"}, {"regex_item": "${1}", "regex_match": "TEL\\s*[:\\.\\/]*\\s*([\\d\\-\\)\\(]+)", "remove_tag": "1", "item_name": "tel", "xpath": "//pao2"}]} | |
Super hotel | |
input_data={"target": [{"url": "http://www.superhotel.co.jp/s_hotels/tottorikita/tottorikita.html", "id": 1}, {"url": "http://www.superhotel.co.jp/s_hotels/hirose/hirose.html", "id": 2}, {"url": "http://www.superhotel.co.jp/s_hotels/ueno/ueno.html", "id": 3}, {"url": "http://www.superhotel.co.jp/s_hotels/fujinomiya/", "id": 4}, {"url": "http://www.superhotel.co.jp/s_hotels/omiya/omiya.html", "id": 5}, {"url": "http://www.superhotel.co.jp/s_hotels/kitami/kitami.html", "id": 6}, {"url": "http://www.superhotel.co.jp/s_hotels/sendai/sendai.html", "id": 7}, {"url": "http://www.superhotel.co.jp/s_hotels/kushiro/kushiro.html", "id": 8}, {"url": "http://www.superhotel.co.jp/s_hotels/akihabara/", "id": 9}, {"url": "http://www.superhotel.co.jp/s_hotels/hachinohe/hachinohe.html", "id": 10}], "format": [{"regex_match": "Null", "xpath": "//p[@class=\"hotel_name__ja\"]", "item_name": "name", "remove_tag": "1", "regex_item": "Null"}, {"regex_match": "\u3012\\s*([\\d\\-\\\u2212\\\u2010]+)", "xpath": "//*[@id=\"v_address\"]", "item_name": "zip", "remove_tag": "1", "regex_item": "${1}"}, {"regex_match": "\u3012\\s*[\\d\\-\\\u2212\\\u2010]+\\s*(.+)\\s*TEL", "xpath": "//*[@id=\"v_address\"]", "item_name": "address", "remove_tag": "1", "regex_item": "${1}"}, {"regex_match": "TEL[\uff1a:]([\\d\\-]+)", "xpath": "//p[@class=\"tel\"]", "item_name": "tel", "remove_tag": "1", "regex_item": "${1}"}], | |
"coordination": [ | |
{"regex_match": "navitime\\.geo\\.LatLng\\(([\\d\\-\\.]+),([\\d\\-\\.]+)\\)", | |
"tinymapurl_regex": "", | |
"mapurl_regex": "", | |
"latlon_xpath": "/html/body/script[13]/text()", | |
"srid": "", | |
"mapurl_xpath": "", "lat": "${1}", "lon": "${2}"}]} | |
input_data={"target": [ | |
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_584", "id": 1}, | |
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_210", "id": 2}, | |
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_565", "id": 3}, | |
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_527", "id": 4}, | |
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_66", "id": 5}, | |
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_84", "id": 6}, | |
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_571", "id": 7}, | |
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_533", "id": 8}, | |
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_560", "id": 9}, | |
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_649", "id": 10}], | |
"format": [{"regex_item": "Null", "regex_match": "Null", "remove_tag": "1", "xpath": "//*[@id=\"crumbs\"]/li[3]/a", "item_name": "name"}, {"regex_item": "${1}", "regex_match": "\u3012\\s*([\\d\\-\\\u2212\\\u2010]+)", "remove_tag": "1", "xpath": "//*[@id=\"page_topbox\"]/p[1]", "item_name": "zip"}, {"regex_item": "${1}", "regex_match": "\u3012\\s*[\\d\\-\\\u2212\\\u2010]+\\s*(.+)$", "remove_tag": "1", "xpath": "//*[@id=\"page_topbox\"]/p[1]", "item_name": "address"}, {"regex_item": "Null", "regex_match": "Null", "remove_tag": "1", "xpath": "//*[@id=\"page_topbox\"]/p[1]/span/strong", "item_name": "tel"}, {"regex_item": "${1}", "regex_match": "^(?:\u3010\u516c\u5f0f\u3011)*(.+)\\s*[\\\uff5c\\|]", "remove_tag": "1", "xpath": "/html/head/title", "item_name": "name"}, {"regex_item": "${1}", "regex_match": "\u3012\\s*([\\d\\-\\\u2212\\\u2010]+)", "remove_tag": "1", "xpath": "//*[@id=\"page_topbox\"]", "item_name": "zip"}, {"regex_item": "${1}", "regex_match": "\u3012\\s*[\\d\\-\\\u2212\\\u2010]+\\s*(.+)$", "remove_tag": "1", "xpath": "//*[@id=\"page_topbox\"]", "item_name": "address"}, {"regex_item": "${1}", "regex_match": "TEL:\\s*([\\d\\-\\)\\(]+)", "remove_tag": "1", "xpath": "//*[@id=\"page_topbox\"]", "item_name": "tel"}], "coordination": [{"srid": "", "latlon_xpath": "/html/body", "mapurl_xpath": "", "lat": "${1}", "lon": "${2}", "regex_match": "navitime\\.geo\\.LatLng\\(([\\d\\-\\.]+),([\\d\\-\\.]+)\\)", "mapurl_regex": "", "tinymapurl_regex": ""}]} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment