{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Crowd-Kit-Categorical.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "metadata": { "id": "KtiDQZCyr3xx" }, "source": [ "%%capture\n", "!pip install crowd-kit" ], "execution_count": 1, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "26Ov-2mHsxek" }, "source": [ "from crowdkit.datasets import load_dataset" ], "execution_count": 2, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "UteIznTotSeH", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "3685dfa8-2fb0-46be-ba31-5bf712890645" }, "source": [ "df, df_gt = load_dataset('relevance-2')" ], "execution_count": 3, "outputs": [ { "output_type": "stream", "text": [ "Downloading relevance-2 from remote\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 419 }, "id": "TwGUO-30dw9S", "outputId": "5f8de55b-e161-450f-f530-de4db30c8527" }, "source": [ "df" ], "execution_count": 4, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
performertasklabel
0w851t306851
1w6991t300080
2w2596t363160
3w5507t151451
4w2982t447851
............
475531w4660t622501
475532w6630t466260
475533w4605t935131
475534w1928t290020
475535w5375t490521
\n", "

475536 rows × 3 columns

\n", "
" ], "text/plain": [ " performer task label\n", "0 w851 t30685 1\n", "1 w6991 t30008 0\n", "2 w2596 t36316 0\n", "3 w5507 t15145 1\n", "4 w2982 t44785 1\n", "... ... ... ...\n", "475531 w4660 t62250 1\n", "475532 w6630 t46626 0\n", "475533 w4605 t93513 1\n", "475534 w1928 t29002 0\n", "475535 w5375 t49052 1\n", "\n", "[475536 rows x 3 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 4 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "UU-OlcgVtWzC", "outputId": "287bf9ad-b869-4244-f18e-5d843695717f" }, "source": [ "df_gt" ], "execution_count": 5, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "task\n", "t30006 0\n", "t33578 0\n", "t22462 1\n", "t52093 0\n", "t26935 0\n", " ..\n", "t57345 1\n", "t81052 1\n", "t7189 1\n", "t80463 0\n", "t93643 0\n", "Name: label, Length: 10079, dtype: int64" ] }, "metadata": { "tags": [] }, "execution_count": 5 } ] }, { "cell_type": "code", "metadata": { "id": "JEorPo9xtaBK" }, "source": [ "from crowdkit.aggregation import MajorityVote, Wawa, DawidSkene" ], "execution_count": 6, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "39KwBx57uvAB", "outputId": "2e9dda94-f7e2-4301-d742-067471067c3f" }, "source": [ "agg_mv = MajorityVote().fit_predict(df)\n", "agg_mv" ], "execution_count": 7, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "task\n", "t0 1\n", "t1 1\n", "t10 1\n", "t100 0\n", "t1000 0\n", " ..\n", "t9995 1\n", "t9996 0\n", "t9997 0\n", "t9998 0\n", "t9999 1\n", "Length: 99319, dtype: int64" ] }, "metadata": { "tags": [] }, "execution_count": 7 } ] }, { "cell_type": "code", "metadata": { "id": "dr2JjXdSu6aw", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "be35d68e-59cd-4452-96dd-1b563c228252" }, "source": [ "agg_wawa = Wawa().fit_predict(df)\n", "agg_wawa" ], "execution_count": 8, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "task\n", "t0 1\n", "t1 1\n", "t10 1\n", "t100 0\n", "t1000 0\n", " ..\n", "t9995 1\n", "t9996 0\n", "t9997 0\n", "t9998 0\n", "t9999 1\n", "Length: 99319, dtype: int64" ] }, "metadata": { "tags": [] }, "execution_count": 8 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "t1l9tf6gu0OV", "outputId": "d9467551-d852-4c3d-c7ce-0099ad7f2a31" }, "source": [ "agg_ds = DawidSkene(n_iter=10).fit_predict(df)\n", "agg_ds" ], "execution_count": 9, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "task\n", "t30685 1\n", "t30008 0\n", "t36316 0\n", "t15145 1\n", "t44785 0\n", " ..\n", "t95222 0\n", "t83525 0\n", "t49227 0\n", "t96106 1\n", "t16185 1\n", "Length: 99319, dtype: int64" ] }, "metadata": { "tags": [] }, "execution_count": 9 } ] }, { "cell_type": "code", "metadata": { "id": "8Qe1qZUHx4hc" }, "source": [ "from sklearn.metrics import f1_score" ], "execution_count": 10, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QbsLakzgenS2", "outputId": "7a138f21-4881-4d9a-e572-6af456f8b67f" }, "source": [ "f1_score(df_gt, agg_mv[df_gt.index])" ], "execution_count": 11, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.7621861152141802" ] }, "metadata": { "tags": [] }, "execution_count": 11 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SbwUVeuslX5r", "outputId": "f21f3276-ee73-440e-9648-7d5b0599a0a8" }, "source": [ "f1_score(df_gt, agg_wawa[df_gt.index])" ], "execution_count": 12, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.7610675039246467" ] }, "metadata": { "tags": [] }, "execution_count": 12 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-9WDMVBpej3s", "outputId": "28f57ede-5ec1-4820-e767-73f64e462d87" }, "source": [ "f1_score(df_gt, agg_ds[df_gt.index])" ], "execution_count": 13, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.7883762200532387" ] }, "metadata": { "tags": [] }, "execution_count": 13 } ] }, { "cell_type": "code", "metadata": { "id": "ypxZq3OzlmOj" }, "source": [ "agg_ds.to_frame('label').to_csv('test.txt')" ], "execution_count": 14, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "KVWtpszFpYCV" }, "source": [ "" ], "execution_count": 14, "outputs": [] } ] }