{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "b3a0c784e41442fc9a46143ff6365935": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_911aa9db468f4e34bbc4088a121dabf5", "IPY_MODEL_532552c233314f6f8b771a0dfea31822", "IPY_MODEL_6da6742541c14da98fee74bc8d50358c" ], "layout": "IPY_MODEL_17cba0e6ac22440181c5223869c4e089" } }, "911aa9db468f4e34bbc4088a121dabf5": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_76e5ed20ea004fdd8e7ef110766d94d4", "placeholder": "", "style": "IPY_MODEL_f659708b1afc4101ae91aa11585cf624", "value": "Downloading readme: 100%" } }, "532552c233314f6f8b771a0dfea31822": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_31b2a25be6914e178b8a132f7f501ba6", "max": 484, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_94578cb824b141a2b824afeacc56823e", "value": 484 } }, "6da6742541c14da98fee74bc8d50358c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f3c9a63e57ea4942b583fc37ac70c387", "placeholder": "", "style": "IPY_MODEL_a04484c35f794b7b8e37ba3cf6b4ab4c", "value": " 484/484 [00:00<00:00, 7.24kB/s]" } }, "17cba0e6ac22440181c5223869c4e089": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "76e5ed20ea004fdd8e7ef110766d94d4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f659708b1afc4101ae91aa11585cf624": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "31b2a25be6914e178b8a132f7f501ba6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "94578cb824b141a2b824afeacc56823e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "f3c9a63e57ea4942b583fc37ac70c387": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a04484c35f794b7b8e37ba3cf6b4ab4c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e99c5430deb44230a3c64916c5c73e55": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_b9707b0983a047528ddc9931d26d347c", "IPY_MODEL_4ecfd157e6ef4a38b9ec0274174abf2f", "IPY_MODEL_4fdf33feadd64e6eac25565dab2a3837" ], "layout": "IPY_MODEL_48ed1d8d3f13486193498273798030bd" } }, "b9707b0983a047528ddc9931d26d347c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e490e2aee92e4ecc940c51ad635beb2a", "placeholder": "", "style": "IPY_MODEL_dfa8c4652a88435ab90f154c26688cb6", "value": "Downloading data: 100%" } }, "4ecfd157e6ef4a38b9ec0274174abf2f": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7525ae6738e64e668b1d7e51fca30e9a", "max": 806606, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_35d0952d7bc6426d83f1e4918de9b92d", "value": 806606 } }, "4fdf33feadd64e6eac25565dab2a3837": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b64d9641e01f4ff1bc3c566a9a1f771a", "placeholder": "", "style": "IPY_MODEL_da07bafbe3a141518e95d91b177abb0a", "value": " 807k/807k [00:01<00:00, 773kB/s]" } }, "48ed1d8d3f13486193498273798030bd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e490e2aee92e4ecc940c51ad635beb2a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "dfa8c4652a88435ab90f154c26688cb6": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7525ae6738e64e668b1d7e51fca30e9a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "35d0952d7bc6426d83f1e4918de9b92d": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "b64d9641e01f4ff1bc3c566a9a1f771a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "da07bafbe3a141518e95d91b177abb0a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "90cbd5f3325241c382399f074784a401": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_73f453a4612a472d9d72c090272899d4", "IPY_MODEL_779bb444302d4d1e9e7e9cd49d530f1f", "IPY_MODEL_68cb933b8ff2460cac3059d905d07f2e" ], "layout": "IPY_MODEL_b5270b6efba84c3caad585c702708e58" } }, "73f453a4612a472d9d72c090272899d4": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a14904d2b82844559d5a4624a8c8c0ee", "placeholder": "", "style": "IPY_MODEL_31202c67e0ec4ef4bb3802b7b4159bac", "value": "Generating train split: 100%" } }, "779bb444302d4d1e9e7e9cd49d530f1f": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ceec7a3a99f3464ea9c795c4a34bd93d", "max": 68, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_fdff39c9b62e492fb7b5723757ed9ec9", "value": 68 } }, "68cb933b8ff2460cac3059d905d07f2e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6754089ca7b64c808a7f33caccd50f63", "placeholder": "", "style": "IPY_MODEL_4a5a2a18a75f4cdd896793d5da86944b", "value": " 68/68 [00:00<00:00, 518.62 examples/s]" } }, "b5270b6efba84c3caad585c702708e58": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a14904d2b82844559d5a4624a8c8c0ee": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "31202c67e0ec4ef4bb3802b7b4159bac": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ceec7a3a99f3464ea9c795c4a34bd93d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "fdff39c9b62e492fb7b5723757ed9ec9": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "6754089ca7b64c808a7f33caccd50f63": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4a5a2a18a75f4cdd896793d5da86944b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "source": [ "!pip install datasets wiktionaryparser -Uq" ], "metadata": { "id": "G2QYwT0NIUMk", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "7c8c3a74-dff9-4776-b463-b4184ad669aa" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m542.0/542.0 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h" ] } ] }, { "cell_type": "markdown", "source": [ "# Language Hacking: Using `spaCy` for Morphosyntactic Analysis\n", "\n", "This workshop is inspired by [this article](https://hdsr.mitpress.mit.edu/pub/owxwohyz/release/6) written by Tufts professor, Dr. Gregory Crane. Crane teaches in the Classical Studies department, meaning that he studies historical languages like Ancient Greek and Latin, so you might be wondering how natural language processing could be relevent to such a discipline. The answer is in *language hacking*, the processing of taking a language which you may or may not know and using pretrained language models to give you a deeper understanding of the text.\n", "\n", "Professor Crane is a 'digital philologist.' Philology (from φιλολογία, the \"love of words\") is the study of language in historical sources, which can include everything from ancient literature to contemporary song lyrics. As a result, philologists are interested in many different languages, especially sources in many languages, but no one, no matter how good a philologist, can learn every language they might be interested in.\n", "\n", "This is where language hacking comes in. Deep learning models like the ones we'll play around with today offer new opportunities for research, language-learning and cross-cultural exchange.\n", "\n", "## What is `spaCy`?\n", "Above I mentioned that we would be using a language model to tell us about the meaning of words in languages we don't know. In this lesson, we'll downlaod these models from a Python package called `spaCy`. `spaCy` is very powerful package with a lot of functionality. We'll only be using a small part of what they offer: their pretrained language models." ], "metadata": { "id": "ShdEWuSriY0m" } }, { "cell_type": "markdown", "source": [ "# Data and Model Preparation\n", "\n", "Before we can start language hacking, we need to set up our texts and models. For this example, I'll be using the original French version of Alexandre Dumas' *The Three Muskeeters* (*Les trois mousquetaires*). As a result, we'll also be using the French `spaCy` model, which we'll need to download. " ], "metadata": { "id": "GI6oukc0-CBC" } }, { "cell_type": "code", "source": [ "from datasets import load_dataset\n", "\n", "dataset = load_dataset(\"pnadel/les_trois_mousquetaires\")\n", "data = dataset[\"train\"].to_pandas()\n", "data" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 624, "referenced_widgets": [ "b3a0c784e41442fc9a46143ff6365935", "911aa9db468f4e34bbc4088a121dabf5", "532552c233314f6f8b771a0dfea31822", "6da6742541c14da98fee74bc8d50358c", "17cba0e6ac22440181c5223869c4e089", "76e5ed20ea004fdd8e7ef110766d94d4", "f659708b1afc4101ae91aa11585cf624", "31b2a25be6914e178b8a132f7f501ba6", "94578cb824b141a2b824afeacc56823e", "f3c9a63e57ea4942b583fc37ac70c387", "a04484c35f794b7b8e37ba3cf6b4ab4c", "e99c5430deb44230a3c64916c5c73e55", "b9707b0983a047528ddc9931d26d347c", "4ecfd157e6ef4a38b9ec0274174abf2f", "4fdf33feadd64e6eac25565dab2a3837", "48ed1d8d3f13486193498273798030bd", "e490e2aee92e4ecc940c51ad635beb2a", "dfa8c4652a88435ab90f154c26688cb6", "7525ae6738e64e668b1d7e51fca30e9a", "35d0952d7bc6426d83f1e4918de9b92d", "b64d9641e01f4ff1bc3c566a9a1f771a", "da07bafbe3a141518e95d91b177abb0a", "90cbd5f3325241c382399f074784a401", "73f453a4612a472d9d72c090272899d4", "779bb444302d4d1e9e7e9cd49d530f1f", "68cb933b8ff2460cac3059d905d07f2e", "b5270b6efba84c3caad585c702708e58", "a14904d2b82844559d5a4624a8c8c0ee", "31202c67e0ec4ef4bb3802b7b4159bac", "ceec7a3a99f3464ea9c795c4a34bd93d", "fdff39c9b62e492fb7b5723757ed9ec9", "6754089ca7b64c808a7f33caccd50f63", "4a5a2a18a75f4cdd896793d5da86944b" ] }, "id": "hOClsNfIiZLV", "outputId": "4bb4316d-5b79-4049-a076-3e73584713c3" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", "You will be able to reuse this secret in all of your notebooks.\n", "Please note that authentication is recommended but still optional to access public models or datasets.\n", " warnings.warn(\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Downloading readme: 0%| | 0.00/484 [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "b3a0c784e41442fc9a46143ff6365935" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "Downloading data: 0%| | 0.00/807k [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "e99c5430deb44230a3c64916c5c73e55" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "Generating train split: 0%| | 0/68 [00:00, ? examples/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "90cbd5f3325241c382399f074784a401" } }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [ " chapter \\\n", "0 DANS LAQUELLE IL EST ÉTABLI QUE, MALGRÉ LEURS ... \n", "1 LES TROIS PRÉSENTS DE M. D’ARTAGNAN PÈRE \n", "2 L’ANTICHAMBRE DE M. DE TRÉVILLE \n", "3 L’AUDIENCE \n", "4 L’ÉPAULE D’ATHOS, LE BAUDRIER DE PORTHOS, \n", ".. ... \n", "63 L’HOMME AU MANTEAU ROUGE \n", "64 JUGEMENT \n", "65 L’EXÉCUTION \n", "66 CONCLUSION \n", "67 ÉPILOGUE \n", "\n", " text \n", "0 Il y a un an à peu près qu’en faisant à la Bib... \n", "1 Le premier lundi du mois d’avril 1625, le bour... \n", "2 M. de Troisville, comme s’appelait encore sa f... \n", "3 M. de Tréville était pour le moment de fort mé... \n", "4 D’Artagnan, furieux, avait traversé l’anticham... \n", ".. ... \n", "63 Le désespoir d’Athos avait fait place à une do... \n", "64 C’était une nuit orageuse et sombre, de gros n... \n", "65 Il était minuit à peu près; la lune, échancrée... \n", "66 Le 6 du mois suivant, le roi, tenant la promes... \n", "67 La Rochelle, privée du secours de la flotte an... \n", "\n", "[68 rows x 2 columns]" ], "text/html": [ "\n", "
\n", " | chapter | \n", "text | \n", "
---|---|---|
0 | \n", "DANS LAQUELLE IL EST ÉTABLI QUE, MALGRÉ LEURS ... | \n", "Il y a un an à peu près qu’en faisant à la Bib... | \n", "
1 | \n", "LES TROIS PRÉSENTS DE M. D’ARTAGNAN PÈRE | \n", "Le premier lundi du mois d’avril 1625, le bour... | \n", "
2 | \n", "L’ANTICHAMBRE DE M. DE TRÉVILLE | \n", "M. de Troisville, comme s’appelait encore sa f... | \n", "
3 | \n", "L’AUDIENCE | \n", "M. de Tréville était pour le moment de fort mé... | \n", "
4 | \n", "L’ÉPAULE D’ATHOS, LE BAUDRIER DE PORTHOS, | \n", "D’Artagnan, furieux, avait traversé l’anticham... | \n", "
... | \n", "... | \n", "... | \n", "
63 | \n", "L’HOMME AU MANTEAU ROUGE | \n", "Le désespoir d’Athos avait fait place à une do... | \n", "
64 | \n", "JUGEMENT | \n", "C’était une nuit orageuse et sombre, de gros n... | \n", "
65 | \n", "L’EXÉCUTION | \n", "Il était minuit à peu près; la lune, échancrée... | \n", "
66 | \n", "CONCLUSION | \n", "Le 6 du mois suivant, le roi, tenant la promes... | \n", "
67 | \n", "ÉPILOGUE | \n", "La Rochelle, privée du secours de la flotte an... | \n", "
68 rows × 2 columns
\n", "