{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# SQLITE Tutorial\n", " \n", "\n", "This tutorial shows you how query the existing `opacity.db` and also shows you how to customize your own `opacity.db`\n", "\n", "A lot of this code is embedded in PICASO." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sqlite3\n", "import io\n", "import numpy as np\n", "import os" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Establishing a Connection to a Database" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#this is where your opacity file should be located if you've set your environments correctly\n", "db_filename = os.path.join(os.getenv('picaso_refdata'), 'opacities','opacities.db')\n", "\n", "#these functions are so that you can store your float arrays as bytes to minimize storage\n", "def adapt_array(arr):\n", " out = io.BytesIO()\n", " np.save(out, arr)\n", " out.seek(0)\n", " return sqlite3.Binary(out.read())\n", "\n", "def convert_array(text):\n", " out = io.BytesIO(text)\n", " out.seek(0)\n", " return np.load(out)\n", "\n", "#tell sqlite what to do with an array\n", "sqlite3.register_adapter(np.ndarray, adapt_array)\n", "sqlite3.register_converter(\"array\", convert_array)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#this will be how we execute commands to grab chunks of data\n", "#this is how you establish a connection to the db\n", "conn = sqlite3.connect(db_filename, detect_types=sqlite3.PARSE_DECLTYPES)\n", "cur = conn.cursor()\n", "\n", "#usually you want to close your database right after you open it using \n", "#conn.close()\n", "\n", "#for now, we will keep it open for the tutorial" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Using `SELECT * FROM` to Query Items\n", "\n", "### Get from header" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#let's start by just grabbing all the info from the header\n", "header = cur.execute('SELECT * FROM header')\n", "cols = [description[0] for description in header.description]\n", "data = cur.fetchall()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Get Continuum Opacity" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#what molecules exist? \n", "cur.execute('SELECT molecule FROM continuum')\n", "print(np.unique(cur.fetchall()))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#what temperatures exist?\n", "cur.execute('SELECT temperature FROM continuum')\n", "cia_temperatures = np.unique(cur.fetchall())\n", "cia_temperatures[0:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#wavenumber grid from header \n", "cur.execute('SELECT wavenumber_grid FROM header')\n", "wave_grid = cur.fetchone()[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#grab H2H2 at 300 K \n", "cur.execute('SELECT opacity FROM continuum WHERE molecule=? AND temperature=?',('H2H2',300))\n", "data = cur.fetchall()\n", "data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#grab all opacity at 300 K \n", "cur.execute('SELECT molecule,opacity FROM continuum WHERE temperature=300')\n", "data = cur.fetchall()\n", "data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Get Molecular Opacity\n", "\n", "Molecular opacities are on a specific P-T grid so we book keep them by assigning indices to each pair e.g (1: 1e-6 bar, 75 K, 2:1e-6, 80K.. and so on) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#get the PT grid with the corresponding grid \n", "cur.execute('SELECT ptid, pressure, temperature FROM molecular')\n", "data= cur.fetchall()\n", "pt_pairs = sorted(list(set(data)),key=lambda x: (x[0]) )\n", "pt_pairs[0:10]#example of the first 10 PT pairs " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#what molecules exist? \n", "cur.execute('SELECT molecule FROM molecular')\n", "print(np.unique(cur.fetchall()))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# grab the opacity at a specific temp and pressure \n", "grab_p = 0.1 # bar \n", "grab_t = 100 # kelvin \n", "import math\n", "\n", "#here's a little code to get out the correct pair (so we dont have to worry about getting the exact number right)\n", "ind_pt = [min(pt_pairs, key=lambda c: math.hypot(c[1]- coordinate[0], c[2]-coordinate[1]))[0] \n", " for coordinate in zip([grab_p],[grab_t])]\n", "\n", "cur.execute(\"\"\"SELECT molecule,ptid,opacity \n", " FROM molecular \n", " WHERE molecule = ?\n", " AND ptid = ?\"\"\",('H2O',ind_pt[0]))\n", "data= cur.fetchall()\n", "data #gives you the molecule, ptid, and the opacity" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "grab_moles = ['H2O','CO2']\n", "grab_p = [0.1,1,100] # bar \n", "grab_t = [100,200,700] # kelvin \n", "\n", "#here's a little code to get out the correct pair (so we dont have to worry about getting the exact number right)\n", "ind_pt = [min(pt_pairs, key=lambda c: math.hypot(c[1]- coordinate[0], c[2]-coordinate[1]))[0] \n", " for coordinate in zip(grab_p,grab_t)]\n", "\n", "cur.execute(\"\"\"SELECT molecule,ptid,opacity \n", " FROM molecular \n", " WHERE molecule in {}\n", " AND ptid in {}\"\"\".format(str(tuple(grab_moles)), str(tuple(ind_pt))))\n", "data= cur.fetchall()\n", "data #gives you the molecule, ptid, and the opacity" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Dont forget to close your connection!!!!\n", "conn.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Creating a New Database from Scratch\n", "\n", "**Note on molecule names**: Because ``picaso`` uses dict formatting to handle opacities, users can easily swap in different molecules. \n", "\n", "For example, if I wanted to include CO2-H2 CIA absorption, I can add ``CO2H2`` to the molecules list below. However, it is only quasi-automated in this regaurd. Please contact natasha.e.batalha@gmail.com if you are adding new CIA to the code. \n", "\n", "**Exceptions**: The exceptions to this are non-CIA continuum opacities. Right now, the other sources of continuum enabled are ``H2-``, ``H-bf`` and ``H-ff`` which have odd-ball formatting since they aren't simple two molecules. _Please let me know if you want to see another continuum source added_.\n", "\n", "**Careful** with case sensitive molecules like **TiO**, **Na**. Make sure you get these right. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "db_filename = '/data/picaso_dbs/new_fake_opacity.db'\n", "conn = sqlite3.connect(db_filename, detect_types=sqlite3.PARSE_DECLTYPES)\n", "#same story with bytes and arrays\n", "def adapt_array(arr):\n", " out = io.BytesIO()\n", " np.save(out, arr)\n", " out.seek(0)\n", " return sqlite3.Binary(out.read())\n", "\n", "def convert_array(text):\n", " out = io.BytesIO(text)\n", " out.seek(0)\n", " return np.load(out)\n", "\n", "#tell sqlite what to do with an array\n", "sqlite3.register_adapter(np.ndarray, adapt_array)\n", "sqlite3.register_converter(\"array\", convert_array)\n", "\n", "cur = conn.cursor()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Build Tables: header, continuum, and molecular tables\n", "\n", "It is **VERY** important that these tables are structured the same way. If you think something should be edited, ping natasha.e.batalha@gmail.com" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#header\n", "command=\"\"\"DROP TABLE IF EXISTS header;\n", "CREATE TABLE header (\n", " id INTEGER PRIMARY KEY,\n", " pressure_unit VARCHAR,\n", " temperature_unit VARCHAR,\n", " wavenumber_grid array,\n", " continuum_unit VARCHAR,\n", " molecular_unit VARCHAR\n", " );\"\"\"\n", "\n", "cur.executescript(command)\n", "conn.commit()\n", "#molecular data table, note the existence of PTID which will be very important\n", "command = \"\"\"DROP TABLE IF EXISTS molecular;\n", "CREATE TABLE molecular (\n", " id INTEGER PRIMARY KEY,\n", " ptid INTEGER,\n", " molecule VARCHAR ,\n", " pressure FLOAT,\n", " temperature FLOAT,\n", " opacity array);\"\"\"\n", "\n", "cur.executescript(command)\n", "conn.commit()\n", "#continuum data table\n", "command = \"\"\"DROP TABLE IF EXISTS continuum;\n", "CREATE TABLE continuum (\n", " id INTEGER PRIMARY KEY,\n", " molecule VARCHAR ,\n", " temperature FLOAT,\n", " opacity array);\"\"\"\n", "\n", "cur.executescript(command)\n", "conn.commit() #this commits the changes to the database" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Insert header info (unit and wave grid info!)\n", "\n", "The units **MUST** be the same. The wave grid can be whatever as long as it's consistent between continuum and molecular tables." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "wave_grid = np.linspace(1e4/2, 1e4/0.5, 1000) #fake inverse cm wavenumber grid\n", "\n", "cur.execute('INSERT INTO header (pressure_unit, temperature_unit, wavenumber_grid, continuum_unit,molecular_unit) values (?,?,?,?,?)', \n", " ('bar','kelvin', np.array(wave_grid), 'cm-1 amagat-2', 'cm2/molecule'))\n", "conn.commit()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Insert continuum opacity to database" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cia_temperature_grid = [100,300,500,700]\n", "#insert continuum\n", "for mol in ['H2H2', 'H2He', 'H2H', 'H2CH4', 'H2N2','H2-', 'H-bf', 'H-ff']:\n", " for T in cia_temperature_grid:\n", " OPACITY = wave_grid *0 + 1e-33 #INSERT YOUR OPACITY HERE\n", " cur.execute('INSERT INTO continuum (molecule, temperature, opacity) values (?,?,?)', (mol,float(T), OPACITY))\n", " conn.commit()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Insert molecular opacity to database\n", "\n", "Again, make sure that your molecules are **case-sensitive**: e.g. Sodium should be `Na` not `NA`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#create a fake PT grid \n", "pts=[]\n", "for temp in [100,200,400]:\n", " for pres in [0.1, 1, 100]:\n", " pts += [[temp,pres]]\n", "pts" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#insert molecular\n", "for mol in ['H2O','CO2','CH4']:\n", " i = 1 #NOTE THIS INDEX HERE IS CRUCIAL! It will be how we quickly locate opacities \n", " for T,P in pts:\n", " OPACITY = wave_grid *0 + 1e-33 #INSERT YOUR OPACITY HERE\n", " cur.execute('INSERT INTO molecular (ptid, molecule, temperature, pressure,opacity) values (?,?,?,?,?)', (i,mol,float(T),float(P), OPACITY))\n", " conn.commit()\n", " i+=1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#ALL DONE!!!\n", "conn.close()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }