Submission

Submission example to LEPISZCZE benchmark
import datasets
import numpy as np

from embeddings.evaluator.evaluation_results import Predictions
from embeddings.evaluator.leaderboard import get_dataset_task
from embeddings.evaluator.submission import AveragedSubmission
from embeddings.utils.utils import get_installed_packages

It is important to note that we not only enable to easily train models but we also prepare many helpers to create a submission to the leaderboard.

We start with a couple of names.

DATASET_NAME = "clarin-pl/polemo2-official"
TARGET_COLUMN_NAME = "target"

We want also gahter all hyper parameters for each submission. We collecct some of params for presentation purposes.

hparams = {"hparam_name_1": 0.2, "hparam_name_2": 0.1}

We doing the same with python packages. We can use one of the helper methods.

packages = get_installed_packages()
packages[:10]
['absl-py==1.4.0',
 'aiofiles==22.1.0',
 'aiohttp==3.8.4',
 'aiosignal==1.3.1',
 'aiosqlite==0.18.0',
 'alembic==1.9.3',
 'anyio==3.6.2',
 'appdirs==1.4.4',
 'argon2-cffi-bindings==21.2.0',
 'argon2-cffi==21.3.0']

The next step is related to datasets and predictions.

dataset = datasets.load_dataset(DATASET_NAME)
dataset
No config specified, defaulting to: polemo2-official/all_text
Found cached dataset polemo2-official (/root/.cache/huggingface/datasets/clarin-pl___polemo2-official/all_text/0.0.0/2b75fdbe5def97538e81fb120f8752744b50729a4ce09bd75132bfc863a2fd70)
100%|██████████| 3/3 [00:00<00:00, 828.48it/s]
DatasetDict({
    train: Dataset({
        features: ['text', 'target'],
        num_rows: 6573
    })
    validation: Dataset({
        features: ['text', 'target'],
        num_rows: 823
    })
    test: Dataset({
        features: ['text', 'target'],
        num_rows: 820
    })
})
y_true = np.array(dataset["test"][TARGET_COLUMN_NAME])
y_true[:10]
array([1, 2, 2, 2, 2, 0, 0, 0, 1, 3])

It is important that we want to store not single prediction for each off object but we want to calcualted standard deviations for each object, hence we need more than one prediction.

predictions = [
    Predictions(
        y_true=y_true, y_pred=np.random.randint(low=0, high=4, size=len(y_true))
    )
    for _ in range(5)
]

Finaly, we can create a submission, gathering all information.

submission = AveragedSubmission.from_predictions(
    submission_name="my-great-submission",  # put your submission here!
    dataset_name=DATASET_NAME,
    dataset_version=dataset["train"].info.version.version_str,
    embedding_name="my-great-model",  # put your embedding name here!
    predictions=predictions,
    hparams=hparams,
    packages=packages,
    task=get_dataset_task(DATASET_NAME),
)

We can even save our submission.

submission.save_json(
    root="my-great-submission",
    filename="my-great-model.json",
    compress=False,
)
!ls my-great-submission
my-great-model.json  my-great-submission_predictions.json
!cat my-great-submission/my-great-model.json
{
  "submission_name": "my-great-submission",
  "dataset_name": "clarin-pl/polemo2-official",
  "dataset_version": "0.0.0",
  "embedding_name": "my-great-model",
  "hparams": {
    "hparam_name_1": 0.2,
    "hparam_name_2": 0.1
  },
  "packages": [
    "absl-py==1.4.0",
    "aiofiles==22.1.0",
    "aiohttp==3.8.4",
    "aiosignal==1.3.1",
    "aiosqlite==0.18.0",
    "alembic==1.9.3",
    "anyio==3.6.2",
    "appdirs==1.4.4",
    "argon2-cffi-bindings==21.2.0",
    "argon2-cffi==21.3.0",
    "arrow==1.2.3",
    "asttokens==2.2.1",
    "astunparse==1.6.3",
    "async-timeout==4.0.2",
    "attrs==22.2.0",
    "babel==2.11.0",
    "backcall==0.2.0",
    "beautifulsoup4==4.11.2",
    "black==21.12b0",
    "bleach==6.0.0",
    "cachetools==5.3.0",
    "catalogue==2.0.8",
    "certifi==2022.12.7",
    "cffi==1.15.1",
    "charset-normalizer==3.0.1",
    "click==8.0.4",
    "cmaes==0.9.1",
    "colorlog==6.7.0",
    "comm==0.1.2",
    "contourpy==1.0.7",
    "coverage==6.2",
    "cycler==0.11.0",
    "datasets==2.9.0",
    "debugpy==1.6.6",
    "decorator==5.1.1",
    "defusedxml==0.7.1",
    "dill==0.3.6",
    "docker-pycreds==0.4.0",
    "execnb==0.1.5",
    "executing==1.2.0",
    "fastcore==1.5.28",
    "fastjsonschema==2.16.2",
    "filelock==3.9.0",
    "fonttools==4.38.0",
    "fqdn==1.5.1",
    "frozenlist==1.3.3",
    "fsspec==2023.1.0",
    "future==0.18.3",
    "ghapi==1.0.3",
    "gitdb==4.0.10",
    "gitpython==3.1.30",
    "google-auth-oauthlib==0.4.6",
    "google-auth==2.16.0",
    "greenlet==2.0.2",
    "grpcio==1.51.1",
    "huggingface-hub==0.12.0",
    "idna==3.4",
    "importlib-metadata==6.0.0",
    "iniconfig==2.0.0",
    "ipykernel==6.21.2",
    "ipython-genutils==0.2.0",
    "ipython==8.10.0",
    "isoduration==20.11.0",
    "isort==5.10.1",
    "jedi==0.18.2",
    "jinja2==3.1.2",
    "joblib==1.2.0",
    "json5==0.9.11",
    "jsonpointer==2.3",
    "jsonschema==4.17.3",
    "jupyter-client==8.0.2",
    "jupyter-core==5.2.0",
    "jupyter-events==0.5.0",
    "jupyter-server-fileid==0.6.0",
    "jupyter-server-terminals==0.4.4",
    "jupyter-server-ydoc==0.6.1",
    "jupyter-server==2.2.1",
    "jupyter-ydoc==0.2.2",
    "jupyterlab-pygments==0.2.2",
    "jupyterlab-server==2.19.0",
    "jupyterlab==3.6.1",
    "kiwisolver==1.4.4",
    "mako==1.2.4",
    "markdown==3.4.1",
    "markupsafe==2.1.2",
    "matplotlib-inline==0.1.6",
    "matplotlib==3.6.3",
    "mistune==2.0.5",
    "multidict==6.0.4",
    "multiprocess==0.70.14",
    "mypy-extensions==1.0.0",
    "mypy==0.991",
    "nbclassic==0.5.1",
    "nbclient==0.7.2",
    "nbconvert==7.2.9",
    "nbdev==2.3.11",
    "nbformat==5.7.3",
    "nest-asyncio==1.5.6",
    "notebook-shim==0.2.2",
    "notebook==6.5.2",
    "numpy==1.23.4",
    "oauthlib==3.2.2",
    "optuna==3.1.0",
    "packaging==23.0",
    "pandas==1.5.3",
    "pandocfilters==1.5.0",
    "parso==0.8.3",
    "pastel==0.2.1",
    "pathspec==0.11.0",
    "pathtools==0.1.2",
    "pexpect==4.8.0",
    "pickleshare==0.7.5",
    "pillow==9.4.0",
    "pip==23.0",
    "platformdirs==3.0.0",
    "pluggy==1.0.0",
    "poethepoet==0.11.0",
    "prometheus-client==0.16.0",
    "prompt-toolkit==3.0.36",
    "protobuf==4.21.12",
    "psutil==5.9.4",
    "ptyprocess==0.7.0",
    "pure-eval==0.2.2",
    "py==1.11.0",
    "pyarrow==11.0.0",
    "pyasn1-modules==0.2.8",
    "pyasn1==0.4.8",
    "pycparser==2.21",
    "pydantic==1.10.4",
    "pydeprecate==0.3.1",
    "pyflakes==2.4.0",
    "pygments==2.14.0",
    "pyparsing==3.0.9",
    "pyrsistent==0.19.3",
    "pytest-env==0.6.2",
    "pytest==6.2.5",
    "python-dateutil==2.8.2",
    "python-json-logger==2.0.5",
    "pytorch-lightning==1.5.4",
    "pytz==2022.7.1",
    "pyyaml==6.0",
    "pyzmq==25.0.0",
    "regex==2022.10.31",
    "requests-oauthlib==1.3.1",
    "requests==2.28.2",
    "responses==0.18.0",
    "rfc3339-validator==0.1.4",
    "rfc3986-validator==0.1.1",
    "rsa==4.9",
    "sacremoses==0.0.53",
    "scikit-learn==1.2.1",
    "scipy==1.9.3",
    "seaborn==0.12.2",
    "send2trash==1.8.0",
    "sentry-sdk==1.15.0",
    "seqeval==1.2.2",
    "setproctitle==1.3.2",
    "setuptools==65.7.0",
    "six==1.16.0",
    "smmap==5.0.0",
    "sniffio==1.3.0",
    "soupsieve==2.3.2.post1",
    "sqlalchemy==2.0.3",
    "srsly==2.4.5",
    "stack-data==0.6.2",
    "tensorboard-data-server==0.7.0",
    "tensorboard-plugin-wit==1.8.1",
    "tensorboard==2.12.0",
    "terminado==0.17.1",
    "threadpoolctl==3.1.0",
    "tinycss2==1.2.1",
    "tokenizers==0.13.2",
    "toml==0.10.2",
    "tomli==1.2.3",
    "torch==1.12.1+cu113",
    "torchaudio==0.12.1+cu113",
    "torchmetrics==0.11.1",
    "torchvision==0.13.1+cu113",
    "tornado==6.2",
    "tqdm==4.64.1",
    "traitlets==5.9.0",
    "transformers==4.26.1",
    "typer==0.7.0",
    "types-docutils==0.19.1.3",
    "types-pyyaml==6.0.12.6",
    "types-requests==2.26.1",
    "types-setuptools==67.2.0.1",
    "typing-extensions==4.4.0",
    "uri-template==1.2.0",
    "urllib3==1.26.14",
    "wandb==0.13.10",
    "watchdog==2.2.1",
    "wcwidth==0.2.6",
    "webcolors==1.12",
    "webencodings==0.5.1",
    "websocket-client==1.5.1",
    "werkzeug==2.2.2",
    "wheel==0.38.4",
    "xgboost==1.7.3",
    "xxhash==3.2.0",
    "y-py==0.5.5",
    "yarl==1.8.2",
    "ypy-websocket==0.8.2",
    "zipp==3.13.0"
  ],
  "config": null,
  "leaderboard_task_name": "Sentiment Analysis",
  "metrics": [
    {
      "accuracy": 0.2707317073170732,
      "f1_macro": 0.26321469558380844,
      "f1_micro": 0.2707317073170732,
      "f1_weighted": 0.27628920268832863,
      "recall_macro": 0.2803374383302084,
      "recall_micro": 0.2707317073170732,
      "recall_weighted": 0.2707317073170732,
      "precision_macro": 0.26682952745742244,
      "precision_micro": 0.2707317073170732,
      "precision_weighted": 0.3012629928300943,
      "classes": {
        "0": {
          "precision": 0.18226600985221675,
          "recall": 0.3135593220338983,
          "f1": 0.23052959501557632,
          "support": 118
        },
        "1": {
          "precision": 0.38961038961038963,
          "recall": 0.26548672566371684,
          "f1": 0.3157894736842105,
          "support": 339
        },
        "2": {
          "precision": 0.2864864864864865,
          "recall": 0.23348017621145375,
          "f1": 0.2572815533980583,
          "support": 227
        },
        "3": {
          "precision": 0.208955223880597,
          "recall": 0.3088235294117647,
          "f1": 0.24925816023738873,
          "support": 136
        }
      }
    },
    {
      "accuracy": 0.22439024390243903,
      "f1_macro": 0.21294902138982494,
      "f1_micro": 0.22439024390243903,
      "f1_weighted": 0.24030286334056883,
      "recall_macro": 0.2195084944832831,
      "recall_micro": 0.22439024390243903,
      "recall_weighted": 0.22439024390243903,
      "precision_macro": 0.23332179622411883,
      "precision_micro": 0.22439024390243903,
      "precision_weighted": 0.2879731607716613,
      "classes": {
        "0": {
          "precision": 0.1145374449339207,
          "recall": 0.22033898305084745,
          "f1": 0.15072463768115943,
          "support": 118
        },
        "1": {
          "precision": 0.42934782608695654,
          "recall": 0.23303834808259588,
          "f1": 0.30210325047801145,
          "support": 339
        },
        "2": {
          "precision": 0.265,
          "recall": 0.23348017621145375,
          "f1": 0.24824355971896955,
          "support": 227
        },
        "3": {
          "precision": 0.12440191387559808,
          "recall": 0.19117647058823528,
          "f1": 0.15072463768115943,
          "support": 136
        }
      }
    },
    {
      "accuracy": 0.25,
      "f1_macro": 0.24275437640503172,
      "f1_micro": 0.25,
      "f1_weighted": 0.25883890927696696,
      "recall_macro": 0.2591245000460524,
      "recall_micro": 0.25,
      "recall_weighted": 0.25,
      "precision_macro": 0.25787615946976955,
      "precision_micro": 0.25,
      "precision_weighted": 0.3033807816571067,
      "classes": {
        "0": {
          "precision": 0.15021459227467812,
          "recall": 0.2966101694915254,
          "f1": 0.1994301994301994,
          "support": 118
        },
        "1": {
          "precision": 0.40804597701149425,
          "recall": 0.20943952802359883,
          "f1": 0.27680311890838205,
          "support": 339
        },
        "2": {
          "precision": 0.3116279069767442,
          "recall": 0.29515418502202645,
          "f1": 0.30316742081447967,
          "support": 227
        },
        "3": {
          "precision": 0.16161616161616163,
          "recall": 0.23529411764705882,
          "f1": 0.19161676646706588,
          "support": 136
        }
      }
    },
    {
      "accuracy": 0.22560975609756098,
      "f1_macro": 0.2120784669389474,
      "f1_micro": 0.22560975609756098,
      "f1_weighted": 0.23774855341302614,
      "recall_macro": 0.21755784862350558,
      "recall_micro": 0.22560975609756098,
      "recall_weighted": 0.22560975609756098,
      "precision_macro": 0.22335565084380252,
      "precision_micro": 0.22560975609756098,
      "precision_weighted": 0.26888250066206376,
      "classes": {
        "0": {
          "precision": 0.10952380952380952,
          "recall": 0.19491525423728814,
          "f1": 0.1402439024390244,
          "support": 118
        },
        "1": {
          "precision": 0.3761904761904762,
          "recall": 0.23303834808259588,
          "f1": 0.2877959927140255,
          "support": 339
        },
        "2": {
          "precision": 0.27014218009478674,
          "recall": 0.2511013215859031,
          "f1": 0.26027397260273977,
          "support": 227
        },
        "3": {
          "precision": 0.13756613756613756,
          "recall": 0.19117647058823528,
          "f1": 0.16,
          "support": 136
        }
      }
    },
    {
      "accuracy": 0.23780487804878048,
      "f1_macro": 0.22578338654073793,
      "f1_micro": 0.23780487804878048,
      "f1_weighted": 0.24833186785701405,
      "recall_macro": 0.23551622569493447,
      "recall_micro": 0.23780487804878048,
      "recall_weighted": 0.23780487804878048,
      "precision_macro": 0.2362003959319496,
      "precision_micro": 0.23780487804878048,
      "precision_weighted": 0.2804571653307677,
      "classes": {
        "0": {
          "precision": 0.14883720930232558,
          "recall": 0.2711864406779661,
          "f1": 0.1921921921921922,
          "support": 118
        },
        "1": {
          "precision": 0.38164251207729466,
          "recall": 0.23303834808259588,
          "f1": 0.2893772893772894,
          "support": 339
        },
        "2": {
          "precision": 0.2932692307692308,
          "recall": 0.2687224669603524,
          "f1": 0.28045977011494255,
          "support": 227
        },
        "3": {
          "precision": 0.12105263157894737,
          "recall": 0.16911764705882354,
          "f1": 0.1411042944785276,
          "support": 136
        }
      }
    }
  ],
  "metrics_avg": {
    "accuracy": 0.24170731707317072,
    "f1_macro": 0.2313559893716701,
    "f1_micro": 0.24170731707317072,
    "f1_weighted": 0.2523022793151809,
    "recall_macro": 0.2424089014355968,
    "recall_micro": 0.24170731707317072,
    "recall_weighted": 0.24170731707317072,
    "precision_macro": 0.24351670598541258,
    "precision_micro": 0.24170731707317072,
    "precision_weighted": 0.28839132025033876,
    "classes": {
      "0": {
        "precision": 0.14107581317739012,
        "recall": 0.2593220338983051,
        "f1": 0.18262410535163034,
        "support": 118
      },
      "1": {
        "precision": 0.39696743619532227,
        "recall": 0.23480825958702067,
        "f1": 0.2943738250323838,
        "support": 339
      },
      "2": {
        "precision": 0.28530516086544966,
        "recall": 0.2563876651982379,
        "f1": 0.269885255329838,
        "support": 227
      },
      "3": {
        "precision": 0.15071841370348832,
        "recall": 0.21911764705882353,
        "f1": 0.17854077177282832,
        "support": 136
      }
    }
  },
  "metrics_median": {
    "accuracy": 0.23780487804878048,
    "f1_macro": 0.22578338654073793,
    "f1_micro": 0.23780487804878048,
    "f1_weighted": 0.24833186785701405,
    "recall_macro": 0.23551622569493447,
    "recall_micro": 0.23780487804878048,
    "recall_weighted": 0.23780487804878048,
    "precision_macro": 0.2362003959319496,
    "precision_micro": 0.23780487804878048,
    "precision_weighted": 0.2879731607716613,
    "classes": {
      "0": {
        "precision": 0.14883720930232558,
        "recall": 0.2711864406779661,
        "f1": 0.1921921921921922
      },
      "1": {
        "precision": 0.38961038961038963,
        "recall": 0.23303834808259588,
        "f1": 0.2893772893772894
      },
      "2": {
        "precision": 0.2864864864864865,
        "recall": 0.2511013215859031,
        "f1": 0.26027397260273977
      },
      "3": {
        "precision": 0.13756613756613756,
        "recall": 0.19117647058823528,
        "f1": 0.16
      }
    }
  },
  "metrics_std": {
    "accuracy": 0.019270608073295843,
    "f1_macro": 0.021716316631819502,
    "f1_micro": 0.019270608073295843,
    "f1_weighted": 0.015729439855574547,
    "recall_macro": 0.026960608260889717,
    "recall_micro": 0.019270608073295843,
    "recall_weighted": 0.019270608073295843,
    "precision_macro": 0.01812190850612583,
    "precision_micro": 0.019270608073295843,
    "precision_weighted": 0.014440252955113348,
    "classes": {
      "0": {
        "precision": 0.02974980159131166,
        "recall": 0.0503506806017388,
        "f1": 0.037022245544002845
      },
      "1": {
        "precision": 0.02174790344417702,
        "recall": 0.019963332525886293,
        "f1": 0.01496109107948936
      },
      "2": {
        "precision": 0.018707782111451528,
        "recall": 0.026136382333376784,
        "f1": 0.022017697139155554
      },
      "3": {
        "precision": 0.03624874525075996,
        "recall": 0.05561079529761482,
        "f1": 0.04384893965204796
      }
    }
  },
  "averaged_over": 5
}