import datasets
import numpy as np
from embeddings.evaluator.evaluation_results import Predictions
from embeddings.evaluator.leaderboard import get_dataset_task
from embeddings.evaluator.submission import AveragedSubmission
from embeddings.utils.utils import get_installed_packages
It is important to note that we not only enable to easily train models but we also prepare many helpers to create a submission to the leaderboard.
We start with a couple of names.
= "clarin-pl/polemo2-official"
DATASET_NAME = "target" TARGET_COLUMN_NAME
We want also gahter all hyper parameters for each submission. We collecct some of params for presentation purposes.
= {"hparam_name_1": 0.2, "hparam_name_2": 0.1} hparams
We doing the same with python packages. We can use one of the helper methods.
= get_installed_packages()
packages 10] packages[:
['absl-py==1.4.0',
'aiofiles==22.1.0',
'aiohttp==3.8.4',
'aiosignal==1.3.1',
'aiosqlite==0.18.0',
'alembic==1.9.3',
'anyio==3.6.2',
'appdirs==1.4.4',
'argon2-cffi-bindings==21.2.0',
'argon2-cffi==21.3.0']
The next step is related to datasets and predictions.
= datasets.load_dataset(DATASET_NAME)
dataset dataset
No config specified, defaulting to: polemo2-official/all_text
Found cached dataset polemo2-official (/root/.cache/huggingface/datasets/clarin-pl___polemo2-official/all_text/0.0.0/2b75fdbe5def97538e81fb120f8752744b50729a4ce09bd75132bfc863a2fd70)
100%|██████████| 3/3 [00:00<00:00, 828.48it/s]
DatasetDict({
train: Dataset({
features: ['text', 'target'],
num_rows: 6573
})
validation: Dataset({
features: ['text', 'target'],
num_rows: 823
})
test: Dataset({
features: ['text', 'target'],
num_rows: 820
})
})
= np.array(dataset["test"][TARGET_COLUMN_NAME])
y_true 10] y_true[:
array([1, 2, 2, 2, 2, 0, 0, 0, 1, 3])
It is important that we want to store not single prediction for each off object but we want to calcualted standard deviations for each object, hence we need more than one prediction.
= [
predictions
Predictions(=y_true, y_pred=np.random.randint(low=0, high=4, size=len(y_true))
y_true
)for _ in range(5)
]
Finaly, we can create a submission, gathering all information.
= AveragedSubmission.from_predictions(
submission ="my-great-submission", # put your submission here!
submission_name=DATASET_NAME,
dataset_name=dataset["train"].info.version.version_str,
dataset_version="my-great-model", # put your embedding name here!
embedding_name=predictions,
predictions=hparams,
hparams=packages,
packages=get_dataset_task(DATASET_NAME),
task )
We can even save our submission.
submission.save_json(="my-great-submission",
root="my-great-model.json",
filename=False,
compress )
!ls my-great-submission
my-great-model.json my-great-submission_predictions.json
!cat my-great-submission/my-great-model.json
{
"submission_name": "my-great-submission",
"dataset_name": "clarin-pl/polemo2-official",
"dataset_version": "0.0.0",
"embedding_name": "my-great-model",
"hparams": {
"hparam_name_1": 0.2,
"hparam_name_2": 0.1
},
"packages": [
"absl-py==1.4.0",
"aiofiles==22.1.0",
"aiohttp==3.8.4",
"aiosignal==1.3.1",
"aiosqlite==0.18.0",
"alembic==1.9.3",
"anyio==3.6.2",
"appdirs==1.4.4",
"argon2-cffi-bindings==21.2.0",
"argon2-cffi==21.3.0",
"arrow==1.2.3",
"asttokens==2.2.1",
"astunparse==1.6.3",
"async-timeout==4.0.2",
"attrs==22.2.0",
"babel==2.11.0",
"backcall==0.2.0",
"beautifulsoup4==4.11.2",
"black==21.12b0",
"bleach==6.0.0",
"cachetools==5.3.0",
"catalogue==2.0.8",
"certifi==2022.12.7",
"cffi==1.15.1",
"charset-normalizer==3.0.1",
"click==8.0.4",
"cmaes==0.9.1",
"colorlog==6.7.0",
"comm==0.1.2",
"contourpy==1.0.7",
"coverage==6.2",
"cycler==0.11.0",
"datasets==2.9.0",
"debugpy==1.6.6",
"decorator==5.1.1",
"defusedxml==0.7.1",
"dill==0.3.6",
"docker-pycreds==0.4.0",
"execnb==0.1.5",
"executing==1.2.0",
"fastcore==1.5.28",
"fastjsonschema==2.16.2",
"filelock==3.9.0",
"fonttools==4.38.0",
"fqdn==1.5.1",
"frozenlist==1.3.3",
"fsspec==2023.1.0",
"future==0.18.3",
"ghapi==1.0.3",
"gitdb==4.0.10",
"gitpython==3.1.30",
"google-auth-oauthlib==0.4.6",
"google-auth==2.16.0",
"greenlet==2.0.2",
"grpcio==1.51.1",
"huggingface-hub==0.12.0",
"idna==3.4",
"importlib-metadata==6.0.0",
"iniconfig==2.0.0",
"ipykernel==6.21.2",
"ipython-genutils==0.2.0",
"ipython==8.10.0",
"isoduration==20.11.0",
"isort==5.10.1",
"jedi==0.18.2",
"jinja2==3.1.2",
"joblib==1.2.0",
"json5==0.9.11",
"jsonpointer==2.3",
"jsonschema==4.17.3",
"jupyter-client==8.0.2",
"jupyter-core==5.2.0",
"jupyter-events==0.5.0",
"jupyter-server-fileid==0.6.0",
"jupyter-server-terminals==0.4.4",
"jupyter-server-ydoc==0.6.1",
"jupyter-server==2.2.1",
"jupyter-ydoc==0.2.2",
"jupyterlab-pygments==0.2.2",
"jupyterlab-server==2.19.0",
"jupyterlab==3.6.1",
"kiwisolver==1.4.4",
"mako==1.2.4",
"markdown==3.4.1",
"markupsafe==2.1.2",
"matplotlib-inline==0.1.6",
"matplotlib==3.6.3",
"mistune==2.0.5",
"multidict==6.0.4",
"multiprocess==0.70.14",
"mypy-extensions==1.0.0",
"mypy==0.991",
"nbclassic==0.5.1",
"nbclient==0.7.2",
"nbconvert==7.2.9",
"nbdev==2.3.11",
"nbformat==5.7.3",
"nest-asyncio==1.5.6",
"notebook-shim==0.2.2",
"notebook==6.5.2",
"numpy==1.23.4",
"oauthlib==3.2.2",
"optuna==3.1.0",
"packaging==23.0",
"pandas==1.5.3",
"pandocfilters==1.5.0",
"parso==0.8.3",
"pastel==0.2.1",
"pathspec==0.11.0",
"pathtools==0.1.2",
"pexpect==4.8.0",
"pickleshare==0.7.5",
"pillow==9.4.0",
"pip==23.0",
"platformdirs==3.0.0",
"pluggy==1.0.0",
"poethepoet==0.11.0",
"prometheus-client==0.16.0",
"prompt-toolkit==3.0.36",
"protobuf==4.21.12",
"psutil==5.9.4",
"ptyprocess==0.7.0",
"pure-eval==0.2.2",
"py==1.11.0",
"pyarrow==11.0.0",
"pyasn1-modules==0.2.8",
"pyasn1==0.4.8",
"pycparser==2.21",
"pydantic==1.10.4",
"pydeprecate==0.3.1",
"pyflakes==2.4.0",
"pygments==2.14.0",
"pyparsing==3.0.9",
"pyrsistent==0.19.3",
"pytest-env==0.6.2",
"pytest==6.2.5",
"python-dateutil==2.8.2",
"python-json-logger==2.0.5",
"pytorch-lightning==1.5.4",
"pytz==2022.7.1",
"pyyaml==6.0",
"pyzmq==25.0.0",
"regex==2022.10.31",
"requests-oauthlib==1.3.1",
"requests==2.28.2",
"responses==0.18.0",
"rfc3339-validator==0.1.4",
"rfc3986-validator==0.1.1",
"rsa==4.9",
"sacremoses==0.0.53",
"scikit-learn==1.2.1",
"scipy==1.9.3",
"seaborn==0.12.2",
"send2trash==1.8.0",
"sentry-sdk==1.15.0",
"seqeval==1.2.2",
"setproctitle==1.3.2",
"setuptools==65.7.0",
"six==1.16.0",
"smmap==5.0.0",
"sniffio==1.3.0",
"soupsieve==2.3.2.post1",
"sqlalchemy==2.0.3",
"srsly==2.4.5",
"stack-data==0.6.2",
"tensorboard-data-server==0.7.0",
"tensorboard-plugin-wit==1.8.1",
"tensorboard==2.12.0",
"terminado==0.17.1",
"threadpoolctl==3.1.0",
"tinycss2==1.2.1",
"tokenizers==0.13.2",
"toml==0.10.2",
"tomli==1.2.3",
"torch==1.12.1+cu113",
"torchaudio==0.12.1+cu113",
"torchmetrics==0.11.1",
"torchvision==0.13.1+cu113",
"tornado==6.2",
"tqdm==4.64.1",
"traitlets==5.9.0",
"transformers==4.26.1",
"typer==0.7.0",
"types-docutils==0.19.1.3",
"types-pyyaml==6.0.12.6",
"types-requests==2.26.1",
"types-setuptools==67.2.0.1",
"typing-extensions==4.4.0",
"uri-template==1.2.0",
"urllib3==1.26.14",
"wandb==0.13.10",
"watchdog==2.2.1",
"wcwidth==0.2.6",
"webcolors==1.12",
"webencodings==0.5.1",
"websocket-client==1.5.1",
"werkzeug==2.2.2",
"wheel==0.38.4",
"xgboost==1.7.3",
"xxhash==3.2.0",
"y-py==0.5.5",
"yarl==1.8.2",
"ypy-websocket==0.8.2",
"zipp==3.13.0"
],
"config": null,
"leaderboard_task_name": "Sentiment Analysis",
"metrics": [
{
"accuracy": 0.2707317073170732,
"f1_macro": 0.26321469558380844,
"f1_micro": 0.2707317073170732,
"f1_weighted": 0.27628920268832863,
"recall_macro": 0.2803374383302084,
"recall_micro": 0.2707317073170732,
"recall_weighted": 0.2707317073170732,
"precision_macro": 0.26682952745742244,
"precision_micro": 0.2707317073170732,
"precision_weighted": 0.3012629928300943,
"classes": {
"0": {
"precision": 0.18226600985221675,
"recall": 0.3135593220338983,
"f1": 0.23052959501557632,
"support": 118
},
"1": {
"precision": 0.38961038961038963,
"recall": 0.26548672566371684,
"f1": 0.3157894736842105,
"support": 339
},
"2": {
"precision": 0.2864864864864865,
"recall": 0.23348017621145375,
"f1": 0.2572815533980583,
"support": 227
},
"3": {
"precision": 0.208955223880597,
"recall": 0.3088235294117647,
"f1": 0.24925816023738873,
"support": 136
}
}
},
{
"accuracy": 0.22439024390243903,
"f1_macro": 0.21294902138982494,
"f1_micro": 0.22439024390243903,
"f1_weighted": 0.24030286334056883,
"recall_macro": 0.2195084944832831,
"recall_micro": 0.22439024390243903,
"recall_weighted": 0.22439024390243903,
"precision_macro": 0.23332179622411883,
"precision_micro": 0.22439024390243903,
"precision_weighted": 0.2879731607716613,
"classes": {
"0": {
"precision": 0.1145374449339207,
"recall": 0.22033898305084745,
"f1": 0.15072463768115943,
"support": 118
},
"1": {
"precision": 0.42934782608695654,
"recall": 0.23303834808259588,
"f1": 0.30210325047801145,
"support": 339
},
"2": {
"precision": 0.265,
"recall": 0.23348017621145375,
"f1": 0.24824355971896955,
"support": 227
},
"3": {
"precision": 0.12440191387559808,
"recall": 0.19117647058823528,
"f1": 0.15072463768115943,
"support": 136
}
}
},
{
"accuracy": 0.25,
"f1_macro": 0.24275437640503172,
"f1_micro": 0.25,
"f1_weighted": 0.25883890927696696,
"recall_macro": 0.2591245000460524,
"recall_micro": 0.25,
"recall_weighted": 0.25,
"precision_macro": 0.25787615946976955,
"precision_micro": 0.25,
"precision_weighted": 0.3033807816571067,
"classes": {
"0": {
"precision": 0.15021459227467812,
"recall": 0.2966101694915254,
"f1": 0.1994301994301994,
"support": 118
},
"1": {
"precision": 0.40804597701149425,
"recall": 0.20943952802359883,
"f1": 0.27680311890838205,
"support": 339
},
"2": {
"precision": 0.3116279069767442,
"recall": 0.29515418502202645,
"f1": 0.30316742081447967,
"support": 227
},
"3": {
"precision": 0.16161616161616163,
"recall": 0.23529411764705882,
"f1": 0.19161676646706588,
"support": 136
}
}
},
{
"accuracy": 0.22560975609756098,
"f1_macro": 0.2120784669389474,
"f1_micro": 0.22560975609756098,
"f1_weighted": 0.23774855341302614,
"recall_macro": 0.21755784862350558,
"recall_micro": 0.22560975609756098,
"recall_weighted": 0.22560975609756098,
"precision_macro": 0.22335565084380252,
"precision_micro": 0.22560975609756098,
"precision_weighted": 0.26888250066206376,
"classes": {
"0": {
"precision": 0.10952380952380952,
"recall": 0.19491525423728814,
"f1": 0.1402439024390244,
"support": 118
},
"1": {
"precision": 0.3761904761904762,
"recall": 0.23303834808259588,
"f1": 0.2877959927140255,
"support": 339
},
"2": {
"precision": 0.27014218009478674,
"recall": 0.2511013215859031,
"f1": 0.26027397260273977,
"support": 227
},
"3": {
"precision": 0.13756613756613756,
"recall": 0.19117647058823528,
"f1": 0.16,
"support": 136
}
}
},
{
"accuracy": 0.23780487804878048,
"f1_macro": 0.22578338654073793,
"f1_micro": 0.23780487804878048,
"f1_weighted": 0.24833186785701405,
"recall_macro": 0.23551622569493447,
"recall_micro": 0.23780487804878048,
"recall_weighted": 0.23780487804878048,
"precision_macro": 0.2362003959319496,
"precision_micro": 0.23780487804878048,
"precision_weighted": 0.2804571653307677,
"classes": {
"0": {
"precision": 0.14883720930232558,
"recall": 0.2711864406779661,
"f1": 0.1921921921921922,
"support": 118
},
"1": {
"precision": 0.38164251207729466,
"recall": 0.23303834808259588,
"f1": 0.2893772893772894,
"support": 339
},
"2": {
"precision": 0.2932692307692308,
"recall": 0.2687224669603524,
"f1": 0.28045977011494255,
"support": 227
},
"3": {
"precision": 0.12105263157894737,
"recall": 0.16911764705882354,
"f1": 0.1411042944785276,
"support": 136
}
}
}
],
"metrics_avg": {
"accuracy": 0.24170731707317072,
"f1_macro": 0.2313559893716701,
"f1_micro": 0.24170731707317072,
"f1_weighted": 0.2523022793151809,
"recall_macro": 0.2424089014355968,
"recall_micro": 0.24170731707317072,
"recall_weighted": 0.24170731707317072,
"precision_macro": 0.24351670598541258,
"precision_micro": 0.24170731707317072,
"precision_weighted": 0.28839132025033876,
"classes": {
"0": {
"precision": 0.14107581317739012,
"recall": 0.2593220338983051,
"f1": 0.18262410535163034,
"support": 118
},
"1": {
"precision": 0.39696743619532227,
"recall": 0.23480825958702067,
"f1": 0.2943738250323838,
"support": 339
},
"2": {
"precision": 0.28530516086544966,
"recall": 0.2563876651982379,
"f1": 0.269885255329838,
"support": 227
},
"3": {
"precision": 0.15071841370348832,
"recall": 0.21911764705882353,
"f1": 0.17854077177282832,
"support": 136
}
}
},
"metrics_median": {
"accuracy": 0.23780487804878048,
"f1_macro": 0.22578338654073793,
"f1_micro": 0.23780487804878048,
"f1_weighted": 0.24833186785701405,
"recall_macro": 0.23551622569493447,
"recall_micro": 0.23780487804878048,
"recall_weighted": 0.23780487804878048,
"precision_macro": 0.2362003959319496,
"precision_micro": 0.23780487804878048,
"precision_weighted": 0.2879731607716613,
"classes": {
"0": {
"precision": 0.14883720930232558,
"recall": 0.2711864406779661,
"f1": 0.1921921921921922
},
"1": {
"precision": 0.38961038961038963,
"recall": 0.23303834808259588,
"f1": 0.2893772893772894
},
"2": {
"precision": 0.2864864864864865,
"recall": 0.2511013215859031,
"f1": 0.26027397260273977
},
"3": {
"precision": 0.13756613756613756,
"recall": 0.19117647058823528,
"f1": 0.16
}
}
},
"metrics_std": {
"accuracy": 0.019270608073295843,
"f1_macro": 0.021716316631819502,
"f1_micro": 0.019270608073295843,
"f1_weighted": 0.015729439855574547,
"recall_macro": 0.026960608260889717,
"recall_micro": 0.019270608073295843,
"recall_weighted": 0.019270608073295843,
"precision_macro": 0.01812190850612583,
"precision_micro": 0.019270608073295843,
"precision_weighted": 0.014440252955113348,
"classes": {
"0": {
"precision": 0.02974980159131166,
"recall": 0.0503506806017388,
"f1": 0.037022245544002845
},
"1": {
"precision": 0.02174790344417702,
"recall": 0.019963332525886293,
"f1": 0.01496109107948936
},
"2": {
"precision": 0.018707782111451528,
"recall": 0.026136382333376784,
"f1": 0.022017697139155554
},
"3": {
"precision": 0.03624874525075996,
"recall": 0.05561079529761482,
"f1": 0.04384893965204796
}
}
},
"averaged_over": 5
}