Skip to content

Instantly share code, notes, and snippets.

@myano
Last active September 21, 2018 21:31
Show Gist options
  • Save myano/88dfdb4fea27bb632293e107a663998a to your computer and use it in GitHub Desktop.
Save myano/88dfdb4fea27bb632293e107a663998a to your computer and use it in GitHub Desktop.
This is a new handler I made for the archivenow project for Megalodon.jp along with the modifications to the web portion.
--- index.html-orig 2018-09-21 17:27:49.048418655 -0400
+++ index.html 2018-09-21 15:30:11.071417494 -0400
@@ -60,7 +60,7 @@
}
#text_url{
- width:333px;
+ width:800px;
font-size: 12.5px;
}
@@ -130,6 +130,11 @@
opacity: 0;
}
+.img5 {
+ width: 13px;
+ opacity: 0;
+}
+
#apilink{
font-size: smaller;
padding-top: 39px;
@@ -143,7 +148,7 @@
<label for="text_url" id="label_url">URL</label>
<input type="text" id="text_url" required>
</div>
-
+
<div>
<p id="select_label">Select archives:</p>
<div id="choices">
@@ -151,6 +156,7 @@
<input type="checkbox" id="choice-archive1" checked > Internet Archive <img src={{ url_for('static', filename = "ajax-loader.gif") }} class="img1" id="img1"> <br>
<input type="checkbox" id="choice-archive2" checked > Archive.is <img src={{ url_for('static', filename = "ajax-loader.gif") }} class="img2" id="img2"> <br>
<input type="checkbox" id="choice-archive3" checked > WebCite <img src={{ url_for('static', filename = "ajax-loader.gif") }} class="img3" id="img3"> <br>
+ <input type="checkbox" id="choice-archive5" checked > Megalodon.jp <img src={{ url_for('static', filename = "ajax-loader.gif") }} class="img5" id="img5"> <br>
<input type="checkbox" id="choice-archive4" > Perma.cc <img src={{ url_for('static', filename = "ajax-loader.gif") }} class="img4" id="img4">
<div class="reveal-if-active">
<label for="perma_cc_api">Permaa.cc requires <a href="https://perma.cc/settings/tools" target="_blank"> an API Key </a></label>
@@ -206,11 +212,18 @@
document.getElementById('choice-archive4').checked = false
}
}
+ if (localStorage.getItem("check_archive_5") !== null){
+ if (localStorage.getItem("check_archive_5") == 'true'){
+ document.getElementById('choice-archive5').checked = true
+ }else{
+ document.getElementById('choice-archive5').checked = false
+ }
+ }
function reset() {
window.location.reload();
-
+
}
function push_archive() {
@@ -219,7 +232,8 @@
localStorage.setItem("check_archive_1", false);
localStorage.setItem("check_archive_2", false);
localStorage.setItem("check_archive_3", false);
- localStorage.setItem("check_archive_4", false);
+ localStorage.setItem("check_archive_4", false);
+ localStorage.setItem("check_archive_5", false);
var arr = []
@@ -232,6 +246,9 @@
if(table.rows[r].cells[0].innerHTML.indexOf("http://archive.is") !== -1){
arr.push("is");
}
+ if(table.rows[r].cells[0].innerHTML.indexOf("http://megalodon.jp") !== -1){
+ arr.push("mg");
+ }
if(table.rows[r].cells[0].innerHTML.indexOf("http://www.webcitation.org") !== -1){
arr.push("wc");
}
@@ -241,7 +258,8 @@
}
function validateURL(textval) {
- var urlregex = /^(https?|ftp):\/\/(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*@)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|\/|\?)*)?$/i;
+ /* var urlregex = /^(https?|ftp):\/\/(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*@)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|\/|\?)*)?$/i; */
+ var urlregex = /^https?:\S+/i;
return urlregex.test(textval);
}
if (validateURL(document.getElementById('text_url').value) == false){
@@ -261,7 +279,7 @@
var selected_archives = 0;
if (document.getElementById('choice-archive1').checked == true){
-
+
selected_archives = selected_archives + 1;
if(arr.indexOf("ia") == -1){
@@ -289,8 +307,8 @@
localStorage.setItem("check_archive_1", true);
}
if (document.getElementById('choice-archive2').checked == true){
-
- selected_archives = selected_archives + 1;
+
+ selected_archives = selected_archives + 1;
if(arr.indexOf("is") == -1){
document.getElementById('img2').style.opacity = 1
@@ -317,9 +335,9 @@
localStorage.setItem("check_archive_2", true);
}
if (document.getElementById('choice-archive3').checked == true){
-
+
selected_archives = selected_archives + 1;
-
+
if(arr.indexOf("wc") == -1){
document.getElementById('img3').style.opacity = 1
$.ajax({
@@ -374,6 +392,35 @@
localStorage.setItem("check_archive_4", true);
}
+ if (document.getElementById('choice-archive5').checked == true){
+
+ selected_archives = selected_archives + 1;
+
+ if(arr.indexOf("mg") == -1){
+ document.getElementById('img5').style.opacity = 1
+ $.ajax({
+ type: "GET",
+ url: "mg/"+document.getElementById('text_url').value,
+ success: function(json) {
+ if (validateURL(json['results'][0]) == true){
+ var table=document.getElementById("results");
+ var row=table.insertRow(-1);
+ var cell1=row.insertCell(0);
+ var cell2=row.insertCell(1);
+ cell1.innerHTML='<a href="http://megalodon.jp" target="_blank"> Megalodon.jp </a>'
+ cell2.innerHTML='<a href="'+json['results'][0]+'" target="_blank"> '+json['results'][0]+' </a>'
+ document.getElementById('results').style.opacity = 1
+ document.getElementById('img5').style.opacity = 0
+ }
+ },
+ complete: function(){
+ document.getElementById('img5').style.opacity = 0
+ }
+ });
+ }
+ localStorage.setItem("check_archive_5", true);
+ }
+
if (selected_archives == 0){
document.getElementById('errors').innerHTML="*Select at least one archive*";
return;
@@ -381,4 +428,5 @@
}
</script>
</body>
-</html>
\ No newline at end of file
+</html>
+
import os
import requests
new_header = 'Mozilla/5.0 (X11; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0'
class MG_handler(object):
def __init__(self):
self.enabled = True
self.name = 'Megalodon.jp'
self.api_required = False
def push(self, uri_org, p_args=[]):
msg = ''
try:
headers = {
'User-Agent': new_header,
}
try:
r = requests.get('http://megalodon.jp/?url=' + uri_org, headers=headers)
token = str(r.content).split('"token" value="',1)[1].split('"',1)[0]
cookies = dict(PHPSESSID=r.headers['Set-Cookie'].split('PHPSESSID=',1)[1].split(';',1)[0])
except Exception as e:
msg = "Error ({0}): {1}".format(self.name, str(e))
try:
r2 = requests.post('http://megalodon.jp/pc/get_simple/decide',
data={"url":uri_org, "token":token},
cookies=cookies, headers=headers)
except Exception as e:
msg = "Error ({0}): {1}".format(self.name, str(e))
msg = str(r2.content).split('location.href = "',1)[1].split('"',1)[0]
except Exception as e:
if msg == '':
msg = "Error (" + self.name+ "): " + str(e)
pass;
return msg
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment