{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "ffd95d87",
   "metadata": {},
   "source": [
    "# DE1 — Final Project Notebook\n",
    "> Author : Badr TAJINI - Data Engineering I - ESIEE 2025-2026\n",
    "---\n",
    "\n",
    "This is the primary executable artifact. Fill config, run baseline, then optimized pipeline, and record evidence."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e9d57643",
   "metadata": {},
   "source": [
    "## 0. Load config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "573be2a4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'paths': {'raw_csv_glob': '/home/saraa/lab_assignment/projet/raw/en.openfoodfacts.org.products.csv',\n",
       "  'bronze': '/home/saraa/lab_assignment/outputs/projet/bronze',\n",
       "  'silver': '/home/saraa/lab_assignment/outputs/projet/silver',\n",
       "  'gold': '/home/saraa/lab_assignment/outputs/projet/gold',\n",
       "  'proof': '/home/saraa/lab_assignment/outputs/projet/proof'},\n",
       " 'layout': {'partition_by': ['countries_tags']},\n",
       " 'slo': {'freshness_hours': 2,\n",
       "  'q1_latency_seconds': 4,\n",
       "  'storage_reduction_ratio': 0.6},\n",
       " 'queries': {'q1': 'daily_product_count',\n",
       "  'q2': 'top_brands',\n",
       "  'q3': 'nutriscore_distribution'}}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import yaml, pathlib, datetime\n",
    "from pyspark.sql import SparkSession, functions as F, types as T\n",
    "\n",
    "with open(\"/home/saraa/lab_assignment/projet/de1_project_config.yml\") as f:\n",
    "    CFG = yaml.safe_load(f)\n",
    "\n",
    "spark = SparkSession.builder.appName(\"de1-project\").getOrCreate()\n",
    "CFG\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "19bb69cc",
   "metadata": {},
   "source": [
    "## 1. Bronze — landing raw data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "8e5659e0",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Bronze written: /home/saraa/lab_assignment/outputs/projet/bronze\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Stage 7:=====================================================>   (85 + 6) / 91]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Raw count: 4239250\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                "
     ]
    }
   ],
   "source": [
    "raw_glob = CFG[\"paths\"][\"raw_csv_glob\"]\n",
    "bronze = CFG[\"paths\"][\"bronze\"]\n",
    "proof = CFG[\"paths\"][\"proof\"]\n",
    "df_raw = ( \n",
    "    spark.read \n",
    "        .option(\"header\", \"true\") \n",
    "        .option(\"sep\", \"\\t\") # très important : OpenFoodFacts = tabulation \n",
    "        .csv(raw_glob) \n",
    ")\n",
    "\n",
    "df_raw.write.mode(\"overwrite\").csv(bronze)  # keep raw as CSV copy\n",
    "print(\"Bronze written:\", bronze)\n",
    "print(\"Raw count:\", df_raw.count())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "04c88c8f",
   "metadata": {},
   "source": [
    "## 2. Silver — cleaning and typing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "31feb449",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "25/12/26 13:10:22 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:22 WARN BasicWriteTaskStatsTracker: Expected 1 files, but only saw 0. This could be due to the output format not writing empty files, or files being not immediately visible in the filesystem.\n",
      "25/12/26 13:10:23 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:10:25 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:25 WARN BasicWriteTaskStatsTracker: Expected 1 files, but only saw 0. This could be due to the output format not writing empty files, or files being not immediately visible in the filesystem.\n",
      "25/12/26 13:10:25 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:10:25 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:25 WARN BasicWriteTaskStatsTracker: Expected 1 files, but only saw 0. This could be due to the output format not writing empty files, or files being not immediately visible in the filesystem.\n",
      "25/12/26 13:10:26 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:10:26 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:26 WARN BasicWriteTaskStatsTracker: Expected 1 files, but only saw 0. This could be due to the output format not writing empty files, or files being not immediately visible in the filesystem.\n",
      "25/12/26 13:10:26 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:10:27 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:27 WARN BasicWriteTaskStatsTracker: Expected 1 files, but only saw 0. This could be due to the output format not writing empty files, or files being not immediately visible in the filesystem.\n",
      "25/12/26 13:10:27 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:10:27 WARN BasicWriteTaskStatsTracker: Expected 1 files, but only saw 0. This could be due to the output format not writing empty files, or files being not immediately visible in the filesystem.\n",
      "25/12/26 13:10:27 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:28 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:10:29 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:29 WARN BasicWriteTaskStatsTracker: Expected 1 files, but only saw 0. This could be due to the output format not writing empty files, or files being not immediately visible in the filesystem.\n",
      "25/12/26 13:10:29 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:10:42 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:42 WARN BasicWriteTaskStatsTracker: Expected 1 files, but only saw 0. This could be due to the output format not writing empty files, or files being not immediately visible in the filesystem.\n",
      "25/12/26 13:10:42 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:10:42 WARN BasicWriteTaskStatsTracker: Expected 1 files, but only saw 0. This could be due to the output format not writing empty files, or files being not immediately visible in the filesystem.\n",
      "25/12/26 13:10:42 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 84.44% for 9 writers\n",
      "25/12/26 13:10:42 WARN BasicWriteTaskStatsTracker: Expected 1 files, but only saw 0. This could be due to the output format not writing empty files, or files being not immediately visible in the filesystem.\n",
      "25/12/26 13:10:42 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:10:42 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:42 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:10:44 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:44 WARN BasicWriteTaskStatsTracker: Expected 1 files, but only saw 0. This could be due to the output format not writing empty files, or files being not immediately visible in the filesystem.\n",
      "25/12/26 13:10:44 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:10:45 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:45 WARN BasicWriteTaskStatsTracker: Expected 1 files, but only saw 0. This could be due to the output format not writing empty files, or files being not immediately visible in the filesystem.\n",
      "25/12/26 13:10:45 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:10:45 WARN BasicWriteTaskStatsTracker: Expected 1 files, but only saw 0. This could be due to the output format not writing empty files, or files being not immediately visible in the filesystem.\n",
      "25/12/26 13:10:45 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:45 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:10:45 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:46 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:10:46 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:46 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:10:48 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:48 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:10:48 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:48 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:10:49 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:49 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:10:49 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:50 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:10:50 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:10:50 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:01 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:01 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:02 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:02 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:03 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:03 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:11:03 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:03 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:04 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:05 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:06 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:07 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:11:07 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:07 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:09 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:09 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:11:09 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 84.44% for 9 writers\n",
      "25/12/26 13:11:09 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:11:09 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:09 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:10 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:10 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:12 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:12 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:21 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:22 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:23 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:23 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:24 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:24 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:11:25 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:25 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:11:25 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:25 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:11:25 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:25 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:25 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:26 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:28 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:28 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:29 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:29 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:30 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:31 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:31 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:32 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:32 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:33 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:42 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:43 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:43 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:44 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:45 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:45 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:11:45 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:45 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:11:45 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:45 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:46 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:46 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:46 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:47 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:48 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:49 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:50 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:50 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:51 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:51 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:51 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:52 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:11:52 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:11:53 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:02 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:02 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:03 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:03 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:05 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:05 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:06 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:06 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:12:06 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 84.44% for 9 writers\n",
      "25/12/26 13:12:07 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:12:07 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:07 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:09 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:09 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:09 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:10 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:12 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:12 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:12 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:13 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:12:13 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:13 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:12:13 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:13 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:21 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:22 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:22 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:22 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:24 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:24 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:27 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:27 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:12:27 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:27 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:29 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:29 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:12:30 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:30 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:31 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:31 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:35 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:35 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:12:35 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 84.44% for 9 writers\n",
      "25/12/26 13:12:35 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 95.00% for 8 writers\n",
      "25/12/26 13:12:35 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 84.44% for 9 writers\n",
      "25/12/26 13:12:35 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:12:35 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:35 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:43 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:43 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:12:44 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:44 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:44 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:45 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:46 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:46 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:48 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:49 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:50 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:50 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:51 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:51 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:52 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:53 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:54 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:55 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:55 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:55 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:12:55 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:56 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:12:56 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:12:56 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:13:03 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:13:04 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:13:04 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:13:05 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:13:05 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:13:05 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:13:06 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:13:06 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:13:09 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:13:09 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:13:09 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:13:10 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:13:11 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:13:11 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:13:12 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:13:12 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:13:14 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:13:15 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:13:15 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 84.44% for 9 writers\n",
      "25/12/26 13:13:15 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:13:15 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:13:15 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:13:15 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:13:15 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:13:23 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:13:24 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 63.33% for 12 writers\n",
      "25/12/26 13:13:24 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 69.09% for 11 writers\n",
      "25/12/26 13:13:24 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 76.00% for 10 writers\n",
      "25/12/26 13:13:25 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 84.44% for 9 writers\n",
      "25/12/26 13:13:27 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory\n",
      "Scaling row group sizes to 95.00% for 8 writers\n",
      "                                                                                "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Silver written: /home/saraa/lab_assignment/outputs/projet/silver\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Stage 13:======================================================> (88 + 3) / 91]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Silver count: 3909666\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                "
     ]
    }
   ],
   "source": [
    "silver = CFG[\"paths\"][\"silver\"]\n",
    "\n",
    "from pyspark.sql import functions as F, types as T\n",
    "\n",
    "df_silver = (\n",
    "    df_raw\n",
    "        # Dates UNIX : timestamp\n",
    "        .withColumn(\"created_t\", F.from_unixtime(F.col(\"created_t\").cast(\"long\")).cast(\"timestamp\"))\n",
    "        .withColumn(\"last_modified_t\", F.from_unixtime(F.col(\"last_modified_t\").cast(\"long\")).cast(\"timestamp\"))\n",
    "\n",
    "        # Nutrition (cast en double si présent)\n",
    "        .withColumn(\"energy_100g\", F.col(\"energy_100g\").cast(\"double\"))\n",
    "        .withColumn(\"fat_100g\", F.col(\"fat_100g\").cast(\"double\"))\n",
    "        .withColumn(\"sugars_100g\", F.col(\"sugars_100g\").cast(\"double\"))\n",
    "        .withColumn(\"proteins_100g\", F.col(\"proteins_100g\").cast(\"double\"))\n",
    "\n",
    "        # On enlève les lignes sans nom de produit\n",
    "        .dropna(subset=[\"product_name\"])\n",
    ")\n",
    "\n",
    "df_silver.write.mode(\"overwrite\").parquet(silver)\n",
    "\n",
    "print(\"Silver written:\", silver)\n",
    "print(\"Silver count:\", df_silver.count())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "806c38a4",
   "metadata": {},
   "source": [
    "## 3. Gold — analytics tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "fb781895",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Stage 16:======================================================> (88 + 3) / 91]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Q1 written: /home/saraa/lab_assignment/outputs/projet/gold/q1_daily\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                "
     ]
    }
   ],
   "source": [
    "gold = CFG[\"paths\"][\"gold\"]\n",
    "partition_by = CFG[\"layout\"][\"partition_by\"]\n",
    "\n",
    "# Q1 : nombre de produits par jour\n",
    "gold_q1 = (\n",
    "    df_silver\n",
    "        .withColumn(\"day\", F.to_date(\"created_t\"))\n",
    "        .groupBy(\"day\")\n",
    "        .agg(F.count(\"*\").alias(\"product_count\"))\n",
    ")\n",
    "\n",
    "gold_q1.write.mode(\"overwrite\").parquet(f\"{gold}/q1_daily\")\n",
    "\n",
    "\n",
    "print(\"Q1 written:\", f\"{gold}/q1_daily\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "3c18850e-6a24-46ca-a8bd-438aa3d93884",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Stage 26:======================================>                   (2 + 1) / 3]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Q2 written: /home/saraa/lab_assignment/outputs/projet/gold/q2_top_brands\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                "
     ]
    }
   ],
   "source": [
    "gold_q2 = (\n",
    "    df_silver\n",
    "        .groupBy(\"brands\")\n",
    "        .agg(F.count(\"*\").alias(\"product_count\"))\n",
    "        .orderBy(F.desc(\"product_count\"))\n",
    ")\n",
    "\n",
    "gold_q2.write.mode(\"overwrite\").parquet(f\"{gold}/q2_top_brands\")\n",
    "\n",
    "print(\"Q2 written:\", f\"{gold}/q2_top_brands\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "feb4dd3f-f4eb-477e-8fd3-8d69e35f595a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Q3 written: /home/saraa/lab_assignment/outputs/projet/gold/q3_nutriscore\n"
     ]
    }
   ],
   "source": [
    "gold_q3 = (\n",
    "    df_silver\n",
    "        .groupBy(\"nutriscore_grade\")\n",
    "        .agg(F.count(\"*\").alias(\"count\"))\n",
    "        .orderBy(\"nutriscore_grade\")\n",
    ")\n",
    "\n",
    "gold_q3.write.mode(\"overwrite\").parquet(f\"{gold}/q3_nutriscore\")\n",
    "\n",
    "print(\"Q3 written:\", f\"{gold}/q3_nutriscore\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4ab68dda",
   "metadata": {},
   "source": [
    "## 4. Baseline plans and metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "0e67422b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved baseline plan. Record Spark UI metrics now.\n"
     ]
    }
   ],
   "source": [
    "import os, datetime as _dt, pathlib\n",
    "pathlib.Path(proof).mkdir(parents=True, exist_ok=True)\n",
    "\n",
    "# Example baseline plan\n",
    "plan = gold_q1._jdf.queryExecution().executedPlan().toString()\n",
    "with open(f\"{proof}/baseline_q1_plan.txt\",\"w\") as f:\n",
    "    f.write(str(_dt.datetime.now())+\"\\n\")\n",
    "    f.write(plan)\n",
    "print(\"Saved baseline plan. Record Spark UI metrics now.\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "31713933",
   "metadata": {},
   "source": [
    "## 5. Optimization — layout and joins"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "03858b45",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Stage 35:=======================================================>(90 + 1) / 91]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved optimized plan. Record Spark UI metrics now.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                "
     ]
    }
   ],
   "source": [
    "# On réduit le DataFrame aux colonnes utiles\n",
    "df_silver_min = df_silver.select(\"created_t\")\n",
    "\n",
    "# On calcule le nombre de produits par jour\n",
    "gold_q1_opt = (\n",
    "    df_silver_min\n",
    "        .withColumn(\"day\", F.to_date(\"created_t\"))\n",
    "        .groupBy(\"day\")\n",
    "        .agg(F.count(\"*\").alias(\"product_count\"))\n",
    ")\n",
    "\n",
    "# On écrit sans partitionBy (comme pour Q1)\n",
    "gold_q1_opt.write.mode(\"overwrite\").parquet(f\"{gold}/q1_daily_opt\")\n",
    "\n",
    "# On sauvegarde le plan optimisé\n",
    "plan_opt = gold_q1_opt._jdf.queryExecution().executedPlan().toString()\n",
    "with open(f\"{proof}/optimized_q1_plan.txt\",\"w\") as f:\n",
    "    f.write(str(_dt.datetime.now())+\"\\n\")\n",
    "    f.write(plan_opt)\n",
    "\n",
    "print(\"Saved optimized plan. Record Spark UI metrics now.\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a43eb07c",
   "metadata": {},
   "source": [
    "## 6. Cleanup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "d1019938",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Spark session stopped.\n"
     ]
    }
   ],
   "source": [
    "spark.stop()\n",
    "print(\"Spark session stopped.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e389e35c-f5e3-4967-9a51-6af062f3a0ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "import zipfile\n",
    "import os\n",
    "\n",
    "# Dossier racine de ton projet\n",
    "root = \"/home/saraa/lab_assignment/projet\"\n",
    "\n",
    "# Fichiers à inclure en plus du dossier outputs\n",
    "extra_files = [\n",
    "    \"DE1_Project_Notebook_EN.ipynb\",\n",
    "    \"de1_project_config.yml\"\n",
    "]\n",
    "\n",
    "# Dossier outputs à inclure\n",
    "outputs_folder = \"/home/saraa/lab_assignment/outputs/projet\"\n",
    "\n",
    "# Nom du ZIP final\n",
    "zip_path = \"/home/saraa/lab_assignment/DE1_Sara_Projet_Complet.zip\"\n",
    "\n",
    "with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:\n",
    "\n",
    "    # Ajouter les fichiers extra (notebook + yaml)\n",
    "    for file in extra_files:\n",
    "        full_path = os.path.join(root, file)\n",
    "        if os.path.exists(full_path):\n",
    "            zipf.write(full_path, file)\n",
    "        else:\n",
    "            print(\"Fichier introuvable :\", full_path)\n",
    "\n",
    "    # Ajouter tout le dossier outputs/projet\n",
    "    for root_dir, dirs, files in os.walk(outputs_folder):\n",
    "        for file in files:\n",
    "            full_path = os.path.join(root_dir, file)\n",
    "            arcname = os.path.relpath(full_path, root)\n",
    "            zipf.write(full_path, arcname)\n",
    "\n",
    "print(\"ZIP créé :\", zip_path)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bfd8cc4d-6020-4c9e-ad25-0058717c46a9",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fb68309e-bd7d-4c4d-b672-df0683f8d83e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (de1-env)",
   "language": "python",
   "name": "de1-env"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
