diff --git a/Makefile b/Makefile
index 9507c28..0598120 100644
--- a/Makefile
+++ b/Makefile
@@ -92,6 +92,7 @@ data: requirements
$(PYTHON_INTERPRETER) src/data/add_breed.py --species_class sheep --name Texel --code TEX --alias TEXEL_UY --dataset TEXEL_INIA_UY.zip
$(PYTHON_INTERPRETER) src/data/add_breed.py --species_class sheep --name Frizarta --code FRZ --alias 0 --dataset Frizarta54samples_ped_map_files.zip
$(PYTHON_INTERPRETER) src/data/add_breed.py --species_class sheep --name Merino --code MER --alias MERINO_UY --dataset MERINO_INIA_UY.zip
+ $(PYTHON_INTERPRETER) src/data/add_breed.py --species_class sheep --name Merino --code MER --alias MER --dataset MERINO_INIA_UY.zip
$(PYTHON_INTERPRETER) src/data/add_breed.py --species_class sheep --name Corriedale --code CRR --alias CORRIEDALE_UY --dataset CORRIEDALE_INIA_UY.zip
$(PYTHON_INTERPRETER) src/data/add_breed.py --species_class sheep --name Creole --code CRL --alias CRL --dataset CREOLE_INIA_UY.zip
$(PYTHON_INTERPRETER) src/data/add_breed.py --species_class sheep --name "Mérinos d'Arles" --code ARL --alias MER --dataset="High density genotypes of French Sheep populations.zip"
@@ -196,9 +197,9 @@ data: requirements
$(foreach ASSEMBLY, $(SHEEP_ASSEMBLIES), $(PYTHON_INTERPRETER) src/data/import_from_plink.py --file Frizarta54samples_ped_map_files/Frizarta54samples \
--dataset Frizarta54samples_ped_map_files.zip --src_coding forward --chip_name IlluminaOvineSNP50 \
--assembly $(ASSEMBLY) --create_samples;)
- $(foreach ASSEMBLY, $(SHEEP_ASSEMBLIES), $(PYTHON_INTERPRETER) src/data/import_from_plink.py --file MERINO_UY_96_21_12_17_OV54k \
- --dataset MERINO_INIA_UY.zip --chip_name IlluminaOvineSNP50 \
- --assembly $(ASSEMBLY) --create_samples;)
+ $(PYTHON_INTERPRETER) src/data/import_samples.py --src_dataset MERINO_INIA_UY.zip \
+ --datafile MERINO_UY_96_21_12_17_OV54k_samples.xlsx --code_column code --id_column iid \
+ --chip_name IlluminaOvineSNP50 --country_column country
$(foreach ASSEMBLY, $(SHEEP_ASSEMBLIES), $(PYTHON_INTERPRETER) src/data/import_from_plink.py --file CORRIEDALE_UY_60_INIA_Ovine_14sep2010 \
--dataset CORRIEDALE_INIA_UY.zip --chip_name IlluminaOvineSNP50 \
--assembly $(ASSEMBLY) --create_samples;)
@@ -336,6 +337,8 @@ data: requirements
--chip_name IlluminaOvineSNP50 --country_column Country
## convert genotypes without creating samples in database (SHEEP)
+ $(foreach ASSEMBLY, $(SHEEP_ASSEMBLIES), $(PYTHON_INTERPRETER) src/data/import_from_plink.py --file MERINO_UY_96_21_12_17_OV54k \
+ --dataset MERINO_INIA_UY.zip --chip_name IlluminaOvineSNP50 --assembly $(ASSEMBLY);)
$(foreach ASSEMBLY, $(SHEEP_ASSEMBLIES), $(PYTHON_INTERPRETER) src/data/import_from_affymetrix.py --prefix Affymetrix_data_Plate_652_660/Affymetrix_data_Plate_652/Affymetrix_data_Plate_652 \
--dataset Affymetrix_data_Plate_652_660.zip --breed_code CRR --chip_name AffymetrixAxiomOviCan --assembly $(ASSEMBLY) --sample_field alias \
--src_version Oar_v4.0 --src_imported_from affymetrix;)
diff --git a/notebooks/exploratory/0.9.0-bunop-describe-MERINO_INIA.ipynb b/notebooks/exploratory/0.9.0-bunop-describe-MERINO_INIA.ipynb
index 2710a70..79eb57c 100644
--- a/notebooks/exploratory/0.9.0-bunop-describe-MERINO_INIA.ipynb
+++ b/notebooks/exploratory/0.9.0-bunop-describe-MERINO_INIA.ipynb
@@ -17,15 +17,15 @@
"outputs": [],
"source": [
"import io\n",
- "import csv\n",
"import itertools\n",
+ "import pandas as pd\n",
"\n",
- "from collections import Counter\n",
"from zipfile import ZipFile\n",
- "from pathlib import Path\n",
"\n",
"import src.features.illumina\n",
- "from src.features.smarterdb import VariantSheep, global_connection"
+ "from src.features.smarterdb import VariantSheep, global_connection, Dataset, Breed\n",
+ "from src.features.utils import get_project_dir\n",
+ "from src.features.plinkio import TextPlinkIO"
]
},
{
@@ -35,7 +35,7 @@
"metadata": {},
"outputs": [],
"source": [
- "project_dir = Path.cwd().parents[1]\n",
+ "project_dir = get_project_dir()\n",
"datafile = project_dir / \"data/raw/background/MERINO_INIA_UY.zip\""
]
},
@@ -128,12 +128,32 @@
"execution_count": 6,
"id": "upper-judgment",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Skipping: Illumina, Inc.,,,,,,,,,,,,,,,,,,,\n",
+ "Skipping: [Heading],,,,,,,,,,,,,,,,,,,,\n",
+ "Skipping: Descriptor File Name,OvineSNP50_B.bpm,,,,,,,,,,,,,,,,,,,\n",
+ "Skipping: Assay Format,Infinium HD Ultra,,,,,,,,,,,,,,,,,,,\n",
+ "Skipping: Date Manufactured,1/7/2009,,,,,,,,,,,,,,,,,,,\n",
+ "Skipping: Loci Count ,54241,,,,,,,,,,,,,,,,,,,\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Skipping: [Assay],,,,,,,,,,,,,,,,,,,,\n"
+ ]
+ }
+ ],
"source": [
"chip_dir = project_dir / \"data/external/SHE/ILLUMINA/\"\n",
- "old_chip3_file = chip_dir / \"ovinesnp50_b.csv\"\n",
+ "old_chip3_file = chip_dir / \"ovinesnp50_b.csv.gz\"\n",
"old_chip3 = dict()\n",
- "for record in src.features.illumina.read_Manifest(old_chip3_file):\n",
+ "for record in src.features.illumina.read_Manifest(old_chip3_file, delimiter=\",\"):\n",
" old_chip3[record.name] = (record.chr, record.mapinfo)"
]
},
@@ -160,15 +180,15 @@
"missing = 0\n",
"\n",
"for key, value in old_chip3.items():\n",
- " if not key in data_coordinates:\n",
+ " if key not in data_coordinates:\n",
" missing += 1\n",
" continue\n",
- " \n",
+ "\n",
" if value != data_coordinates[key]:\n",
" count += 1\n",
" if count <= 10:\n",
" print(key, value, data_coordinates[key])\n",
- " \n",
+ "\n",
"print(f\"\\nN of SNPs in different positions in merino and old chip3: {count}\")\n",
"print(f\"\\nN of SNPs in merino not in chip: {missing}\")"
]
@@ -196,11 +216,11 @@
"snp 250506CS3900176800001_906.1 with different positions: 7:89002990 <> 7:81648528\n",
"snp 250506CS3900211600001_1041.1 with different positions: 16:44955568 <> 16:41355381\n",
"snp 250506CS3900218700001_1294.1 with different positions: 2:157820235 <> 2:148802744\n",
- "snp 250506CS3900283200001_442.1 with different positions: 1:203289635 <> 99:0\n",
+ "snp 250506CS3900283200001_442.1 with different positions: 1:203289635 <> 0:0\n",
"snp 250506CS3900371000001_1255.1 with different positions: 11:37632867 <> 11:35339123\n",
"snp 250506CS3900386000001_696.1 with different positions: 16:68297712 <> 16:62646307\n",
"snp 250506CS3900414400001_1178.1 with different positions: 1:111100644 <> 1:103396552\n",
- "snp 250506CS3900435700001_1658.1 with different positions: 12:50140951 <> 99:0\n",
+ "snp 250506CS3900435700001_1658.1 with different positions: 12:50140951 <> 0:0\n",
"snp 250506CS3900464100001_519.1 with different positions: 1:91075445 <> 1:85767398\n",
"snp 250506CS3900487100001_1521.1 with different positions: 14:1552575 <> 14:1110363\n",
"snp 250506CS3900539000001_471.1 with different positions: X:74622875 <> X:115765957\n",
@@ -209,8 +229,7 @@
"snp CL635241_413.1 with different positions: 3:196207011 <> 3:182202867\n",
"snp CL635750_128.1 with different positions: 3:242198228 <> 3:223741135\n",
"snp CL635944_160.1 with different positions: 0:0 <> 6:114778683\n",
- "snp Contig35697_5761.1 with different positions: 0:0 <> 6:18835475\n",
- "snp CR_594.1 with different positions: 0:0 <> 99:0\n"
+ "snp Contig35697_5761.1 with different positions: 0:0 <> 6:18835475\n"
]
}
],
@@ -218,7 +237,7 @@
"global_connection()\n",
"for key, value in itertools.islice(data_coordinates.items(), 20):\n",
" qs = VariantSheep.objects(name=key)\n",
- " \n",
+ "\n",
" if qs.count() > 0:\n",
" variant = qs.get()\n",
" location = next(filter(lambda loc: loc.imported_from == \"SNPchiMp v.3\", variant.locations))\n",
@@ -226,10 +245,643 @@
" print(f\"snp {key} with different positions: {value[0]}:{value[1]} <> {location.chrom}:{location.position}\")"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "aware-logging",
+ "metadata": {},
+ "source": [
+ "## Fix metadata\n",
+ "\n",
+ "There are some animals which are imported (have GPS coordinates not in uruguay). So read from metadata and write a new metadata samples with the proper country. First, get samples and breed from ped file:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "94f16289",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset = Dataset.objects.get(file=\"MERINO_INIA_UY.zip\")\n",
+ "plinkio = TextPlinkIO(\n",
+ " prefix=str(dataset.working_dir / \"MERINO_UY_96_21_12_17_OV54k\"),\n",
+ " species=dataset.species,\n",
+ " chip_name=dataset.chip_name)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "89133e3c",
+ "metadata": {},
+ "source": [
+ "Now get `fid` and `iid`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "d6579319",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[['MERINO_UY', '201711200001'],\n",
+ " ['MERINO_UY', '201711200002'],\n",
+ " ['MERINO_UY', '201711200003'],\n",
+ " ['MERINO_UY', '201711200004'],\n",
+ " ['MERINO_UY', '201711200005'],\n",
+ " ['MERINO_UY', '201711200006'],\n",
+ " ['MERINO_UY', '201711200007'],\n",
+ " ['MERINO_UY', '201711200008'],\n",
+ " ['MERINO_UY', '201711200009'],\n",
+ " ['MERINO_UY', '201711200010']]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "samples = [[fid, iid] for fid, iid, *_ in plinkio.read_pedfile()]\n",
+ "samples[:10]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "76efc585",
+ "metadata": {},
+ "source": [
+ "Ok, now I need to open the proper metadata file, in order to select the animal I need:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "f33c25d1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ID | \n",
+ " Breed | \n",
+ " Stall | \n",
+ " GPS_Coordinates | \n",
+ " GPS_2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " Corriedale | \n",
+ " CIEDAG | \n",
+ " -33.86937579589417, -55.57265008365528 | \n",
+ " https://www.google.com/maps/place/CIEDAG+-+Sec... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 10 | \n",
+ " Corriedale | \n",
+ " CIEDAG | \n",
+ " -33.86937579589417, -55.57265008365528 | \n",
+ " https://www.google.com/maps/place/CIEDAG+-+Sec... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 11 | \n",
+ " Corriedale | \n",
+ " CIEDAG | \n",
+ " -33.86937579589417, -55.57265008365528 | \n",
+ " https://www.google.com/maps/place/CIEDAG+-+Sec... | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 12 | \n",
+ " Corriedale | \n",
+ " CIEDAG | \n",
+ " -33.86937579589417, -55.57265008365528 | \n",
+ " https://www.google.com/maps/place/CIEDAG+-+Sec... | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 13 | \n",
+ " Corriedale | \n",
+ " CIEDAG | \n",
+ " -33.86937579589417, -55.57265008365528 | \n",
+ " https://www.google.com/maps/place/CIEDAG+-+Sec... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ID Breed Stall GPS_Coordinates \\\n",
+ "0 1 Corriedale CIEDAG -33.86937579589417, -55.57265008365528 \n",
+ "1 10 Corriedale CIEDAG -33.86937579589417, -55.57265008365528 \n",
+ "2 11 Corriedale CIEDAG -33.86937579589417, -55.57265008365528 \n",
+ "3 12 Corriedale CIEDAG -33.86937579589417, -55.57265008365528 \n",
+ "4 13 Corriedale CIEDAG -33.86937579589417, -55.57265008365528 \n",
+ "\n",
+ " GPS_2 \n",
+ "0 https://www.google.com/maps/place/CIEDAG+-+Sec... \n",
+ "1 https://www.google.com/maps/place/CIEDAG+-+Sec... \n",
+ "2 https://www.google.com/maps/place/CIEDAG+-+Sec... \n",
+ "3 https://www.google.com/maps/place/CIEDAG+-+Sec... \n",
+ "4 https://www.google.com/maps/place/CIEDAG+-+Sec... "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "metadata = Dataset.objects.get(file=\"Smarter_Ids_Uploaded_with_GPSCordinates_FINAL.zip\")\n",
+ "df = pd.read_excel(metadata.working_dir / \"Smarter_Ids_Uploaded_with_GPSCordinates_FINAL.xlsx\")\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f68fd0af",
+ "metadata": {},
+ "source": [
+ "Ok now I need to select from metadata the sample I have in this dataset:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "3998bd90",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " fid | \n",
+ " iid | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " MERINO_UY | \n",
+ " 201711200001 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " MERINO_UY | \n",
+ " 201711200002 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " MERINO_UY | \n",
+ " 201711200003 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " MERINO_UY | \n",
+ " 201711200004 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " MERINO_UY | \n",
+ " 201711200005 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " fid iid\n",
+ "0 MERINO_UY 201711200001\n",
+ "1 MERINO_UY 201711200002\n",
+ "2 MERINO_UY 201711200003\n",
+ "3 MERINO_UY 201711200004\n",
+ "4 MERINO_UY 201711200005"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "samples = pd.DataFrame(samples, columns=[\"fid\", \"iid\"])\n",
+ "samples.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "b651383f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of samples in the metadata: 96\n",
+ "Number of samples in the plink file: 96\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ID | \n",
+ " Breed | \n",
+ " Stall | \n",
+ " GPS_Coordinates | \n",
+ " GPS_2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 158 | \n",
+ " 201711200065 | \n",
+ " Merino | \n",
+ " EEFAS | \n",
+ " -31.38766026946753, -57.71601394419916 | \n",
+ " https://earth.google.com/web/search/Estacion+E... | \n",
+ "
\n",
+ " \n",
+ " 159 | \n",
+ " 201711200066 | \n",
+ " Merino | \n",
+ " EEFAS | \n",
+ " -31.38766026946753, -57.71601394419916 | \n",
+ " https://earth.google.com/web/search/Estacion+E... | \n",
+ "
\n",
+ " \n",
+ " 160 | \n",
+ " 201711200005 | \n",
+ " Merino | \n",
+ " IMPORTADO | \n",
+ " -30.945528469563605, 151.2477073694806 | \n",
+ " https://www.google.com/maps/place/Nerstane+Mer... | \n",
+ "
\n",
+ " \n",
+ " 161 | \n",
+ " 201711200012 | \n",
+ " Merino | \n",
+ " IMPORTADO | \n",
+ " -30.945528469563605, 151.2477073694806 | \n",
+ " https://www.google.com/maps/place/Nerstane+Mer... | \n",
+ "
\n",
+ " \n",
+ " 162 | \n",
+ " 201711200013 | \n",
+ " Merino | \n",
+ " IMPORTADO | \n",
+ " -30.945528469563605, 151.2477073694806 | \n",
+ " https://www.google.com/maps/place/Nerstane+Mer... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ID Breed Stall GPS_Coordinates \\\n",
+ "158 201711200065 Merino EEFAS -31.38766026946753, -57.71601394419916 \n",
+ "159 201711200066 Merino EEFAS -31.38766026946753, -57.71601394419916 \n",
+ "160 201711200005 Merino IMPORTADO -30.945528469563605, 151.2477073694806 \n",
+ "161 201711200012 Merino IMPORTADO -30.945528469563605, 151.2477073694806 \n",
+ "162 201711200013 Merino IMPORTADO -30.945528469563605, 151.2477073694806 \n",
+ "\n",
+ " GPS_2 \n",
+ "158 https://earth.google.com/web/search/Estacion+E... \n",
+ "159 https://earth.google.com/web/search/Estacion+E... \n",
+ "160 https://www.google.com/maps/place/Nerstane+Mer... \n",
+ "161 https://www.google.com/maps/place/Nerstane+Mer... \n",
+ "162 https://www.google.com/maps/place/Nerstane+Mer... "
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df['ID'] = df['ID'].astype(str)\n",
+ "df_selected = df[df['ID'].isin(samples['iid'])]\n",
+ "print(f\"Number of samples in the metadata: {len(df_selected)}\")\n",
+ "print(f\"Number of samples in the plink file: {len(samples)}\")\n",
+ "df_selected.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "52cd9927",
+ "metadata": {},
+ "source": [
+ "Select the proper code for this breed:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "e9b2a503",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'MER'"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "breed = Breed.objects.get(name=\"Merino\", species=\"Sheep\")\n",
+ "breed.code"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "d7279831",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " fid | \n",
+ " iid | \n",
+ " code | \n",
+ " country | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " MERINO_UY | \n",
+ " 201711200001 | \n",
+ " MER | \n",
+ " Uruguay | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " MERINO_UY | \n",
+ " 201711200002 | \n",
+ " MER | \n",
+ " Uruguay | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " MERINO_UY | \n",
+ " 201711200003 | \n",
+ " MER | \n",
+ " Uruguay | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " MERINO_UY | \n",
+ " 201711200004 | \n",
+ " MER | \n",
+ " Uruguay | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " MERINO_UY | \n",
+ " 201711200005 | \n",
+ " MER | \n",
+ " Uruguay | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " fid iid code country\n",
+ "0 MERINO_UY 201711200001 MER Uruguay\n",
+ "1 MERINO_UY 201711200002 MER Uruguay\n",
+ "2 MERINO_UY 201711200003 MER Uruguay\n",
+ "3 MERINO_UY 201711200004 MER Uruguay\n",
+ "4 MERINO_UY 201711200005 MER Uruguay"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "samples[\"code\"] = breed.code\n",
+ "samples[\"country\"] = \"Uruguay\"\n",
+ "samples.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "18c2f44f",
+ "metadata": {},
+ "source": [
+ "Now select all the animals with `IMPORTADO` `Stall` and change country to `Australia`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "5891432c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "country\n",
+ "Uruguay 84\n",
+ "Australia 12\n",
+ "Name: count, dtype: int64\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " fid | \n",
+ " iid | \n",
+ " code | \n",
+ " country | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " MERINO_UY | \n",
+ " 201711200001 | \n",
+ " MER | \n",
+ " Uruguay | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " MERINO_UY | \n",
+ " 201711200002 | \n",
+ " MER | \n",
+ " Uruguay | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " MERINO_UY | \n",
+ " 201711200003 | \n",
+ " MER | \n",
+ " Uruguay | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " MERINO_UY | \n",
+ " 201711200004 | \n",
+ " MER | \n",
+ " Uruguay | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " MERINO_UY | \n",
+ " 201711200005 | \n",
+ " MER | \n",
+ " Australia | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " fid iid code country\n",
+ "0 MERINO_UY 201711200001 MER Uruguay\n",
+ "1 MERINO_UY 201711200002 MER Uruguay\n",
+ "2 MERINO_UY 201711200003 MER Uruguay\n",
+ "3 MERINO_UY 201711200004 MER Uruguay\n",
+ "4 MERINO_UY 201711200005 MER Australia"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "samples.loc[samples['iid'].isin(df_selected[df_selected[\"Stall\"] == \"IMPORTADO\"]['ID']), 'country'] = 'Australia'\n",
+ "print(samples[\"country\"].value_counts())\n",
+ "samples.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2e76f209",
+ "metadata": {},
+ "source": [
+ "Write this metadata to a new file:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "e0046755",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "samples.to_excel(metadata.working_dir / \"MERINO_UY_96_21_12_17_OV54k_samples.xlsx\", index=False)"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
- "id": "aware-logging",
+ "id": "b9a1de9d",
"metadata": {},
"outputs": [],
"source": []
@@ -237,7 +889,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -251,7 +903,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.1"
+ "version": "3.12.3"
}
},
"nbformat": 4,