Last active
February 20, 2016 20:17
-
-
Save lethe2211/6107357d0407cd88c834 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
require 'nokogiri' | |
require 'open-uri' | |
# Microsoft Academic Search APIを扱うためのクラス | |
class MsacademicApiWrapper | |
# 引数の論文IDに対応する論文のタイトルを返す | |
def self.get_title(id) | |
xml = get_paper_info(id) | |
doc = Nokogiri::HTML.parse(xml) | |
title = '' | |
title = doc.css('content title').first.text if doc.css('content title').first | |
return title | |
end | |
# 引数の論文IDに対応する論文の発行年を返す | |
def self.get_year(id) | |
xml = get_paper_info(id) | |
doc = Nokogiri::HTML.parse(xml) | |
year = '' | |
year = doc.css('year').first.text if doc.css('year').first | |
return year | |
end | |
# 引数の論文IDに対応する論文の著者を返す | |
def self.get_authors(id) | |
xml = get_paper_author(id) | |
doc = Nokogiri::HTML.parse(xml) | |
# TODO: authorのIDを取る? | |
authors = doc.css('entry').map { |item| item.css('content name').text } | |
return authors | |
end | |
# 引数の論文IDに対応する論文のURLを返す | |
def self.get_url(id) | |
xml = get_paper_url(id) | |
doc = Nokogiri::HTML.parse(xml) | |
url = '' | |
url = doc.css('url').first.text if doc.css('url').first | |
return url | |
end | |
# 引数の論文IDに対応する論文に引用されている論文のID集合を返す | |
def self.get_citations(id) | |
xml = get_paper_ref(src_id: id) | |
doc = Nokogiri::HTML.parse(xml) | |
citations = doc.css('entry').map { |item| item.css('content dstid').text } | |
return citations | |
end | |
# 引数の論文IDに対応する論文を引用している論文のID集合を返す | |
def self.get_citedbyes(id) | |
xml = get_paper_ref(dst_id: id) | |
doc = Nokogiri::HTML.parse(xml) | |
citedbyes = doc.css('entry').map { |item| item.css('content srcid').text } | |
return citedbyes | |
end | |
# 以下は補助関数 | |
# 引数の論文IDに対応する論文の情報をMicrosoft Academic APIのPaperスキーマから取得する | |
def self.get_paper_info(id) | |
key = "paper_info_#{ id }" | |
api_base_url = 'https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/' | |
api_postfix = 'Paper' | |
filter_by_id = '?$filter=ID%20eq%20' | |
url = "#{ api_base_url }#{ api_postfix }#{ filter_by_id }#{ id }" | |
xml = open(url).read | |
return xml | |
end | |
# 引数の論文IDに対応する論文の情報をMicrosoft Academic APIのPaper_Authorスキーマから取得する | |
def self.get_paper_author(id) | |
key = "paper_author_#{ id }" | |
api_base_url = 'https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/' | |
api_postfix = 'Paper_Author' | |
filter_by_paperid = '?$filter=PaperID%20eq%20' | |
url = "#{ api_base_url }#{ api_postfix }#{ filter_by_paperid }#{ id }" | |
xml = open(url).read | |
return xml | |
end | |
# 引数の論文IDに対応する論文の情報をMicrosoft Academic APIのPaper_Urlスキーマから取得する | |
def self.get_paper_url(id) | |
key = "paper_url_#{ id }" | |
api_base_url = 'https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/' | |
api_postfix = 'Paper_Url' | |
filter_by_paperid = '?$filter=PaperID%20eq%20' | |
url = "#{ api_base_url }#{ api_postfix }#{ filter_by_paperid }#{ id }" | |
xml = open(url).read | |
return xml | |
end | |
# 引数の論文IDに対応する論文の情報をMicrosoft Academic APIのPaper_Refスキーマから取得する | |
def self.get_paper_ref(src_id: '', dst_id: '') | |
key = "paper_ref_src_id=#{ src_id }_dst_id=#{ dst_id }" | |
api_base_url = 'https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/' | |
api_postfix = 'Paper_Ref' | |
filter = '' | |
unless src_id.empty? | |
unless dst_id.empty? | |
filter = "?$filter=SrcID%20eq%20#{ src_id }%20and%20DstID%20eq%20#{ dst_id }" | |
else | |
filter = "?$filter=SrcID%20eq%20#{ src_id }" | |
end | |
else | |
unless dst_id.empty? | |
filter = "?$filter=DstID%20eq%20#{ dst_id }" | |
else | |
return '' | |
end | |
end | |
url = "#{ api_base_url }#{ api_postfix }#{ filter }" | |
xml = open(url).read | |
return xml | |
end | |
end | |
if __FILE__ == $PROGRAM_NAME | |
p MsacademicApiWrapper.get_title('39482') # "Topic-sensitive PageRank" | |
p MsacademicApiWrapper.get_year('39482') # "2002" | |
p MsacademicApiWrapper.get_authors('39482') # ["Taher H. Haveliwala"] | |
p MsacademicApiWrapper.get_url('39482') # "http://doi.acm.org/10.1145/511446.511513" | |
p MsacademicApiWrapper.get_citations('39482') # ["59114", "84130", ..., "4451162"] | |
p MsacademicApiWrapper.get_citedbyes('39482') # ["17258", "18580", ..., "1890867"] | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment