Last active
September 13, 2023 16:48
-
-
Save rochacbruno/528074ebe6dddde2f848200717eece10 to your computer and use it in GitHub Desktop.
galaxy_scores
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Dumps each collection and repository scores to a local scores/ folder | |
""" | |
import json | |
import sys | |
from pathlib import Path | |
from statistics import mean | |
from typing import Dict, Optional, Union | |
import django | |
from django.conf import settings | |
from django.db.models import QuerySet | |
django.setup() # noqa | |
from galaxy.main.models.collection import Collection | |
from galaxy.main.models.repository import Repository, RepositorySurvey | |
OUTPUT_FOLDER = "scores" | |
SURVEY_FIElDS = ( | |
"docs", | |
"ease_of_use", | |
"does_what_it_says", | |
"works_as_is", | |
"used_in_production", | |
) | |
class Score: | |
namespace: str | |
name: str | |
kind: str | |
score: float | |
count: int | |
quality_score: float | |
docs: float | |
ease_of_use: float | |
does_what_it_says: float | |
works_as_is: float | |
used_in_production: float | |
def __init__(self, **kwargs): | |
for key, value in kwargs.items(): | |
setattr(self, key, value) | |
def as_dict(self, exclude=("kind", "namespace", "name")) -> Dict: | |
exclude = exclude or tuple() | |
return {k: v for k, v in self.__dict__.items() if k not in exclude} | |
def as_json(self) -> str: | |
return json.dumps(self.as_dict()) | |
@classmethod | |
def _get_mean(cls, qs: QuerySet, key: str) -> float: | |
# consider only the categories votes to calculate the average | |
# see https://galaxy.ansible.com/docs/contributing/content_scoring.html | |
values = [ | |
getattr(item, key) for item in qs if getattr(item, key) not in [0, None] | |
] | |
if values: | |
mean_value = mean(values) | |
return round(mean_value, 1) | |
return None | |
@classmethod | |
def _calculate_score(cls, obj: Union[Repository, Collection]) -> Optional[float]: | |
# see https://github.com/ansible/galaxy/blob/6a374cacdf0f04de94486913bba5285e24e178d3/galaxyui/src/app/utilities/score/score.component.ts#L33 | |
quality_score = getattr(obj, "quality_score", None) | |
survey_count = ( | |
obj.community_survey_count > 3 and 3 or obj.community_survey_count | |
) | |
community_score = obj.community_score | |
if quality_score is not None and community_score is not None: | |
score = quality_score * ((6 - survey_count) / 6) + obj.community_score * ( | |
survey_count / 6 | |
) | |
else: | |
score = community_score or quality_score | |
if score is not None: | |
return round((score * 10)) / 10 | |
@classmethod | |
def new( | |
cls, | |
obj: Union[Repository, Collection], | |
kind: str, | |
namespace: str, | |
surveys: QuerySet, | |
quality_score: Optional[float] = None, | |
) -> "Score": | |
return cls( | |
kind=kind, | |
namespace=namespace, | |
quality_score=round(quality_score, 1) | |
if quality_score is not None | |
else quality_score, | |
name=obj.name, | |
score=cls._calculate_score(obj), | |
count=obj.community_survey_count, | |
**{field: cls._get_mean(surveys, field) for field in SURVEY_FIElDS}, | |
) | |
@classmethod | |
def from_collection(cls, collection: Collection) -> "Score": | |
return cls.new( | |
collection, | |
kind="collection", | |
namespace=collection.namespace.name, | |
surveys=collection.collectionsurvey_set.all(), | |
) | |
@classmethod | |
def from_repository(cls, repository: Repository) -> "Score": | |
return cls.new( | |
repository, | |
kind="role", | |
namespace=repository.github_user, | |
quality_score=repository.quality_score, | |
surveys=RepositorySurvey.objects.filter(repository=repository), | |
) | |
def save(self): | |
# dump individual /kind/namespace/name.json | |
root_folder = Path(OUTPUT_FOLDER) / Path(self.kind) / Path(self.namespace) | |
root_folder.mkdir(parents=True, exist_ok=True) | |
filepath = root_folder / Path(self.name + ".json") | |
filepath.write_text(self.as_json()) | |
# dump to /kind/namespace.json | |
namespace_file_path = ( | |
Path(OUTPUT_FOLDER) / Path(self.kind) / Path(self.namespace + ".json") | |
) | |
namespace_data = {} | |
if namespace_file_path.exists(): | |
namespace_data.update(json.loads(namespace_file_path.read_text())) | |
namespace_data[self.name] = self.as_dict() | |
namespace_file_path.write_text(json.dumps(namespace_data)) | |
# dump to kind.json | |
kind_file_path = Path(OUTPUT_FOLDER) / Path(self.kind + ".json") | |
kind_data = {} | |
if kind_file_path.exists(): | |
kind_data.update(json.loads(kind_file_path.read_text())) | |
if not self.namespace in kind_data: | |
kind_data[self.namespace] = {} | |
kind_data[self.namespace][self.name] = self.as_dict() | |
kind_file_path.write_text(json.dumps(kind_data)) | |
def progressbar(it, prefix="", size=60, out=sys.stdout): # Python3.6+ | |
count = len(it) | |
def show(j): | |
x = int(size * j / count) | |
print( | |
f"{prefix}[{u'█'*x}{('.'*(size-x))}] {j}/{count}", | |
end="\r", | |
file=out, | |
flush=True, | |
) | |
show(0) | |
for i, item in enumerate(it): | |
yield item | |
show(i + 1) | |
print("\n", flush=True, file=out) | |
def main(): | |
collections = Collection.objects.filter(community_survey_count__gt=0).order_by( | |
"-community_survey_count" | |
) | |
repositories = Repository.objects.filter(community_survey_count__gt=0).order_by( | |
"-community_survey_count" | |
) | |
for collection in progressbar(collections, "Collections: "): | |
score = Score.from_collection(collection) | |
score.save() | |
for repository in progressbar(repositories, "Repositories: "): | |
score = Score.from_repository(repository) | |
score.save() | |
print( | |
f"Dumped {len(collections)} collections and {len(repositories)} roles to {OUTPUT_FOLDER}/" | |
) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment