{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# SQLITE Tutorial\n",
    " \n",
    "\n",
    "This tutorial shows you how query the existing `opacity.db` and also shows you how to customize your own `opacity.db`\n",
    "\n",
    "A lot of this code is embedded in PICASO."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sqlite3\n",
    "import io\n",
    "import numpy as np\n",
    "import os"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Establishing a Connection to a Database"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#this is where your opacity file should be located if you've set your environments correctly\n",
    "db_filename = os.path.join(os.getenv('picaso_refdata'), 'opacities','opacities.db')\n",
    "\n",
    "#these functions are so that you can store your float arrays as bytes to minimize storage\n",
    "def adapt_array(arr):\n",
    "    out = io.BytesIO()\n",
    "    np.save(out, arr)\n",
    "    out.seek(0)\n",
    "    return sqlite3.Binary(out.read())\n",
    "\n",
    "def convert_array(text):\n",
    "    out = io.BytesIO(text)\n",
    "    out.seek(0)\n",
    "    return np.load(out)\n",
    "\n",
    "#tell sqlite what to do with an array\n",
    "sqlite3.register_adapter(np.ndarray, adapt_array)\n",
    "sqlite3.register_converter(\"array\", convert_array)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#this will be how we execute commands to grab chunks of data\n",
    "#this is how you establish a connection to the db\n",
    "conn = sqlite3.connect(db_filename, detect_types=sqlite3.PARSE_DECLTYPES)\n",
    "cur = conn.cursor()\n",
    "\n",
    "#usually you want to close your database right after you open it using \n",
    "#conn.close()\n",
    "\n",
    "#for now, we will keep it open for the tutorial"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using `SELECT * FROM` to Query Items\n",
    "\n",
    "### Get from header"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#let's start by just grabbing all the info from the header\n",
    "header = cur.execute('SELECT * FROM header')\n",
    "cols = [description[0] for description in header.description]\n",
    "data = cur.fetchall()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Get Continuum Opacity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#what molecules exist? \n",
    "cur.execute('SELECT molecule FROM continuum')\n",
    "print(np.unique(cur.fetchall()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#what temperatures exist?\n",
    "cur.execute('SELECT temperature FROM continuum')\n",
    "cia_temperatures = np.unique(cur.fetchall())\n",
    "cia_temperatures[0:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#wavenumber grid from header \n",
    "cur.execute('SELECT wavenumber_grid FROM header')\n",
    "wave_grid = cur.fetchone()[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#grab H2H2 at 300 K \n",
    "cur.execute('SELECT opacity FROM continuum WHERE molecule=? AND temperature=?',('H2H2',300))\n",
    "data = cur.fetchall()\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#grab all opacity at 300 K \n",
    "cur.execute('SELECT molecule,opacity FROM continuum WHERE temperature=300')\n",
    "data = cur.fetchall()\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Get Molecular Opacity\n",
    "\n",
    "Molecular opacities are on a specific P-T grid so we book keep them by assigning indices to each pair e.g (1: 1e-6 bar, 75 K, 2:1e-6, 80K.. and so on) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#get the PT grid with the corresponding grid \n",
    "cur.execute('SELECT ptid, pressure, temperature FROM molecular')\n",
    "data= cur.fetchall()\n",
    "pt_pairs = sorted(list(set(data)),key=lambda x: (x[0]) )\n",
    "pt_pairs[0:10]#example of the first 10 PT pairs "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#what molecules exist? \n",
    "cur.execute('SELECT molecule FROM molecular')\n",
    "print(np.unique(cur.fetchall()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# grab the opacity at a specific temp and pressure \n",
    "grab_p = 0.1 # bar \n",
    "grab_t = 100 # kelvin \n",
    "import math\n",
    "\n",
    "#here's a little code to get out the correct pair (so we dont have to worry about getting the exact number right)\n",
    "ind_pt = [min(pt_pairs, key=lambda c: math.hypot(c[1]- coordinate[0], c[2]-coordinate[1]))[0] \n",
    "          for coordinate in  zip([grab_p],[grab_t])]\n",
    "\n",
    "cur.execute(\"\"\"SELECT molecule,ptid,opacity \n",
    "            FROM molecular \n",
    "            WHERE molecule = ?\n",
    "            AND ptid = ?\"\"\",('H2O',ind_pt[0]))\n",
    "data= cur.fetchall()\n",
    "data #gives you the molecule, ptid, and the opacity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "grab_moles = ['H2O','CO2']\n",
    "grab_p = [0.1,1,100] # bar \n",
    "grab_t = [100,200,700] # kelvin \n",
    "\n",
    "#here's a little code to get out the correct pair (so we dont have to worry about getting the exact number right)\n",
    "ind_pt = [min(pt_pairs, key=lambda c: math.hypot(c[1]- coordinate[0], c[2]-coordinate[1]))[0] \n",
    "          for coordinate in  zip(grab_p,grab_t)]\n",
    "\n",
    "cur.execute(\"\"\"SELECT molecule,ptid,opacity \n",
    "            FROM molecular \n",
    "            WHERE molecule in {}\n",
    "            AND ptid in {}\"\"\".format(str(tuple(grab_moles)), str(tuple(ind_pt))))\n",
    "data= cur.fetchall()\n",
    "data #gives you the molecule, ptid, and the opacity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Dont forget to close your connection!!!!\n",
    "conn.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Creating a New Database from Scratch\n",
    "\n",
    "**Note on molecule names**: Because ``picaso`` uses dict formatting to handle opacities, users can easily swap in different molecules. \n",
    "\n",
    "For example, if I wanted to include CO2-H2 CIA absorption, I can add ``CO2H2`` to the molecules list below. However, it is only quasi-automated in this regaurd. Please contact natasha.e.batalha@gmail.com if you are adding new CIA to the code. \n",
    "\n",
    "**Exceptions**: The exceptions to this are non-CIA continuum opacities. Right now, the other sources of continuum enabled are ``H2-``, ``H-bf`` and ``H-ff`` which have odd-ball formatting since they aren't simple two molecules. _Please let me know if you want to see another continuum source added_.\n",
    "\n",
    "**Careful** with case sensitive molecules like **TiO**, **Na**. Make sure you get these right. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "db_filename = '/data/picaso_dbs/new_fake_opacity.db'\n",
    "conn = sqlite3.connect(db_filename, detect_types=sqlite3.PARSE_DECLTYPES)\n",
    "#same story with bytes and arrays\n",
    "def adapt_array(arr):\n",
    "    out = io.BytesIO()\n",
    "    np.save(out, arr)\n",
    "    out.seek(0)\n",
    "    return sqlite3.Binary(out.read())\n",
    "\n",
    "def convert_array(text):\n",
    "    out = io.BytesIO(text)\n",
    "    out.seek(0)\n",
    "    return np.load(out)\n",
    "\n",
    "#tell sqlite what to do with an array\n",
    "sqlite3.register_adapter(np.ndarray, adapt_array)\n",
    "sqlite3.register_converter(\"array\", convert_array)\n",
    "\n",
    "cur = conn.cursor()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Build Tables: header, continuum, and molecular tables\n",
    "\n",
    "It is **VERY** important that these tables are structured the same way. If you think something should be edited, ping natasha.e.batalha@gmail.com"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#header\n",
    "command=\"\"\"DROP TABLE IF EXISTS header;\n",
    "CREATE TABLE header (\n",
    "    id INTEGER PRIMARY KEY,\n",
    "    pressure_unit VARCHAR,\n",
    "    temperature_unit VARCHAR,\n",
    "    wavenumber_grid array,\n",
    "    continuum_unit VARCHAR,\n",
    "    molecular_unit VARCHAR\n",
    "    );\"\"\"\n",
    "\n",
    "cur.executescript(command)\n",
    "conn.commit()\n",
    "#molecular data table, note the existence of PTID which will be very important\n",
    "command = \"\"\"DROP TABLE IF EXISTS molecular;\n",
    "CREATE TABLE molecular (\n",
    "    id INTEGER PRIMARY KEY,\n",
    "    ptid INTEGER,\n",
    "    molecule VARCHAR ,\n",
    "    pressure FLOAT,\n",
    "    temperature FLOAT,\n",
    "    opacity array);\"\"\"\n",
    "\n",
    "cur.executescript(command)\n",
    "conn.commit()\n",
    "#continuum data table\n",
    "command = \"\"\"DROP TABLE IF EXISTS continuum;\n",
    "CREATE TABLE continuum (\n",
    "    id INTEGER PRIMARY KEY,\n",
    "    molecule VARCHAR ,\n",
    "    temperature FLOAT,\n",
    "    opacity array);\"\"\"\n",
    "\n",
    "cur.executescript(command)\n",
    "conn.commit() #this commits the changes to the database"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Insert header info (unit and wave grid info!)\n",
    "\n",
    "The units **MUST** be the same. The wave grid can be whatever as long as it's consistent between continuum and molecular tables."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wave_grid = np.linspace(1e4/2, 1e4/0.5, 1000) #fake inverse cm wavenumber grid\n",
    "\n",
    "cur.execute('INSERT INTO header (pressure_unit, temperature_unit, wavenumber_grid, continuum_unit,molecular_unit) values (?,?,?,?,?)', \n",
    "            ('bar','kelvin', np.array(wave_grid), 'cm-1 amagat-2', 'cm2/molecule'))\n",
    "conn.commit()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Insert continuum opacity to database"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cia_temperature_grid = [100,300,500,700]\n",
    "#insert continuum\n",
    "for mol in ['H2H2', 'H2He', 'H2H', 'H2CH4', 'H2N2','H2-', 'H-bf', 'H-ff']:\n",
    "    for T in cia_temperature_grid:\n",
    "        OPACITY = wave_grid *0 + 1e-33 #INSERT YOUR OPACITY HERE\n",
    "        cur.execute('INSERT INTO continuum (molecule, temperature, opacity) values (?,?,?)', (mol,float(T), OPACITY))\n",
    "        conn.commit()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Insert molecular opacity to database\n",
    "\n",
    "Again, make sure that your molecules are **case-sensitive**: e.g. Sodium should be `Na` not `NA`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#create a fake PT grid \n",
    "pts=[]\n",
    "for temp in [100,200,400]:\n",
    "    for pres in [0.1, 1, 100]:\n",
    "        pts += [[temp,pres]]\n",
    "pts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#insert molecular\n",
    "for mol in ['H2O','CO2','CH4']:\n",
    "    i = 1 #NOTE THIS INDEX HERE IS CRUCIAL! It will be how we quickly locate opacities \n",
    "    for T,P in pts:\n",
    "        OPACITY = wave_grid *0 + 1e-33 #INSERT YOUR OPACITY HERE\n",
    "        cur.execute('INSERT INTO molecular (ptid, molecule, temperature, pressure,opacity) values (?,?,?,?,?)', (i,mol,float(T),float(P), OPACITY))\n",
    "        conn.commit()\n",
    "        i+=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#ALL DONE!!!\n",
    "conn.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}