Skip to content

Instantly share code, notes, and snippets.

@osanseviero
Last active August 24, 2021 05:58
Show Gist options
  • Save osanseviero/467e57906b4598403af6cee870804f29 to your computer and use it in GitHub Desktop.
Save osanseviero/467e57906b4598403af6cee870804f29 to your computer and use it in GitHub Desktop.
Upload CoreNLP models to the Hub
import os
import shutil
from huggingface_hub import Repository, HfApi, HfFolder
def get_model_card(lang):
model_card = """---
tags:
- corenlp
library_tag: corenlp
language:
- {lang}
license: GNU
---
# Core NLP model for {lang}
CoreNLP is your one stop shop for natural language processing in Java! CoreNLP enables users to derive linguistic annotations for text, including token and sentence boundaries, parts of speech, named entities, numeric and time values, dependency and constituency parses, coreference, sentiment, quote attributions, and relations.
Find more about it in [our website](https://stanfordnlp.github.io/CoreNLP) and our [GitHub repository](https://github.com/stanfordnlp/CoreNLP).
""".format(lang=lang)
return model_card
MODELS = [
"arabic",
"chinese",
"english-default",
"english-extra",
"english-kbp",
"french",
"german",
"spanish"
]
def push_to_hub():
api = HfApi()
for model in MODELS:
# Create the repository
repo_name = "corenlp_" + model
repo_url = api.create_repo(
name=repo_name,
token=HfFolder.get_token(),
organization=None, # Change to StanfordNLP
exist_ok=True,
)
# Clone the repository
repo_local_path = os.path.join("hub", repo_name)
repo = Repository(repo_local_path, clone_from=repo_url)
repo.git_pull(rebase=True)
# Make sure jar files are tracked with LFS
repo.lfs_track(["*.jar"])
# Create a copy of the jar file in the repository
src = f"stanford-corenlp-models-{model}.jar"
dst = os.path.join(repo_local_path, src)
shutil.copy(src, dst)
# Create the model card
lang = model.split("-")[0][:2] # get arabic and then first two chars (ar)
readme_path = os.path.join(repo_local_path, "README.md")
with open(readme_path, "w") as f:
f.write(get_model_card(lang))
# Push the model
print("Pushing files to the Hub")
repo.push_to_hub(commit_message="Add model")
print(f"View your model in {repo_url}")
if __name__ == '__main__':
push_to_hub()
@julien-c
Copy link

url is the same as ‘repo_url’ no?

@osanseviero
Copy link
Author

url is the same as ‘repo_url’ no?

Yes, you're right!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment