Skip to content
Snippets Groups Projects
Commit 0a9a31ce authored by Thomas Uher's avatar Thomas Uher
Browse files

algaebase

parent 7d6ad783
No related branches found
No related tags found
No related merge requests found
# do not add custom to installed sources - custom taxonomy is writeable and not managed by this router
INSTALLED_SOURCES = ['col']
INSTALLED_SOURCES = ['col', 'algaebase']
'''
in general, django has read and write access to the database during development
......@@ -33,7 +33,7 @@ class TaxonomyRouter:
"""
if app_label in INSTALLED_SOURCES:
if db == 'taxonomy_db':
return False
return True
return False
......
......@@ -34,17 +34,34 @@ class TaxonSearch(object):
self.kwargs = kwargs
def make_custom_queries(self):
self.exact_matches_query = self.models.TaxonTreeModel.objects.filter(
taxon_latname__iexact=self.searchtext.upper())
self.matches_query = self.models.TaxonTreeModel.objects.filter(
taxon_latname__istartswith=self.searchtext.upper())
self.vernacular_query = self.models.TaxonLocaleModel.objects.filter(
language=self.language, name__icontains=self.searchtext.upper())
# do not apply limits here, because queries cannot be filtered after slicing
def make_queries(self):
self.exact_matches_query = self.models.TaxonNamesModel.objects.filter(
language__in=['la', self.language], name__iexact=self.searchtext.upper())
if self.taxon_source == 'taxonomy.sources.custom':
self.make_custom_queries()
self.matches_query = self.models.TaxonNamesModel.objects.filter(
language__in=['la', self.language], name__istartswith=self.searchtext.upper())
else:
self.vernacular_query = self.models.TaxonNamesModel.objects.filter(
language=self.language, name__icontains=self.searchtext.upper())
self.exact_matches_query = self.models.TaxonNamesModel.objects.filter(
language__in=['la', self.language], name__iexact=self.searchtext.upper())
self.matches_query = self.models.TaxonNamesModel.objects.filter(
language__in=['la', self.language], name__istartswith=self.searchtext.upper())
self.vernacular_query = self.models.TaxonNamesModel.objects.filter(
language=self.language, name__icontains=self.searchtext.upper())
self.queries_ready = True
......@@ -71,14 +88,20 @@ class TaxonSearch(object):
lazy_taxon = LazyTaxon(**taxon_kwargs)
if name.name_type == 'accepted name':
label = '{0}'.format(name.name)
elif name.name_type == 'synonym':
label = '{0} (syn. {1})'.format(name.taxon_latname, name.name)
if self.taxon_source == 'taxonomy.sources.custom':
label = name.taxon_latname
else:
if name.name_type == 'accepted name':
label = '{0}'.format(name.name)
elif name.name_type == 'synonym':
label = '{0} (syn. {1})'.format(name.taxon_latname, name.name)
elif name.name_type == 'vernacular':
label = '{0} ({1})'.format(name.name, name.taxon_latname)
elif name.name_type == 'vernacular':
label = '{0} ({1})'.format(name.name, name.taxon_latname)
obj = lazy_taxon.as_typeahead_choice(label=label)
......
......@@ -24,13 +24,16 @@ from django.db import transaction
# Python 3
from html.parser import HTMLParser
import logging
'''
base
'''
nuid36 = ['0','1','2','3','4','5','6','7','8','9',
'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
'a','b','c','d','e','f','g','h','i','j','k','l','m',
'n','o','p','q','r','s','t','u','v','w','x','y','z']
def d2n(integer):
......@@ -509,7 +512,13 @@ class TreeCache:
def _make_cache_entry(self, parent_taxon, children):
# sort children by latname
children.sort(key=lambda taxon: taxon.latname)
return {'parent_taxon' : parent_taxon, 'children' : children}
cache_entry = {
'parent_taxon' : parent_taxon,
'children' : children
}
return cache_entry
'''
find_level only uses children, not parent_taxon
......@@ -526,6 +535,8 @@ class TreeCache:
if child.source_id == source_taxon.source_id:
return level_index
return None
'''
......@@ -549,6 +560,7 @@ class TaxonSourceManager:
# the classes are only used by manager specific methods
# the manager independent methods use only instances of these classes
SourceTreeTaxonClass = None
SourceSynonymTaxonClass = None
# classes and instances only used by methods of the independant manager
TaxonTreeModel = None
......@@ -558,6 +570,8 @@ class TaxonSourceManager:
# caching
TreeCacheClass = None
source_name = None
# the subclass needs to implement the taxon models
def __init__(self):
......@@ -571,6 +585,26 @@ class TaxonSourceManager:
self.first_run = False
self.logger = self._get_logger()
def _get_logger(self):
logger = logging.getLogger(self.source_name)
logging_folder = '/var/log/lc-taxonomy/'
if not os.path.isdir(logging_folder):
os.makedirs(logging_folder)
logfile_path = os.path.join(logging_folder, 'TaxonSourceManager')
hdlr = logging.FileHandler(logfile_path)
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
hdlr.setFormatter(formatter)
logger.addHandler(hdlr)
logger.setLevel(logging.INFO)
return logger
def _get_root_source_taxa(self):
raise NotImplementedError('Tree Managers need a _get_root_source_taxa method')
......@@ -584,30 +618,6 @@ class TaxonSourceManager:
return root_taxa
'''
get all children of a tree entry
'''
def _get_children(self, source_taxon):
raise NotImplementedError('Tree Managers need a _get_children method')
'''
this function has to travel right and up until the next parent taxon which has not been climbed down
yet has been found
returns a source_taxon or None
EXAMPLE:
- source_taxon is a taxon without children
- the next sibling of source taxon might have children -> check all siblings first
- if no siblings have children, travel up
'''
def _get_next_sibling(self, source_taxon):
raise NotImplementedError('Tree Managers need a _get_next_sibling method')
# travel one up
def _get_parent(self, source_taxon):
raise NotImplementedError('Tree Managers need a _get_parent method')
'''
save the taxon and all its vernacular names into the LocalCosmos specific *Taxon* Tables
should be the same for all sources
......@@ -739,6 +749,10 @@ class TaxonSourceManager:
# the last_child as a SourceTreeTaxon instance either has a nuid set - or can fetch the nuid from the target db
last_child = self._climb_down(start_taxon)
message = 'last_child: {0}, nuid: {1}'.format(last_child.latname, last_child.get_nuid())
print(message)
self.logger.info(message)
# search siblings of this childless taxon, or parent siblings if no siblings available
# start_taxon is always the last child, which is the first child of the last group of children
# climbing up always uses the new tree - but the taxa it walks already have been saved and have nuids
......@@ -753,7 +767,9 @@ class TaxonSourceManager:
if next_parent:
start_taxon = next_parent
# print('starting nuid (next_parent): %s' % start_taxon.get_nuid())
message = 'starting nuid (next_parent): {0}'.format(start_taxon.get_nuid())
print(message)
self.logger.info(message)
else:
continue_climbing = False
......@@ -874,9 +890,9 @@ class TaxonSourceManager:
existing_children_map = {}
for taxon in existing_children_query:
existing_children.append({'author': taxon.author, 'latname': taxon.taxon_latname})
existing_children.append({'author': taxon.taxon_author, 'latname': taxon.taxon_latname})
author = taxon.author
author = taxon.taxon_author
if not author:
author = 'None'
key = " ".join([taxon.taxon_latname, author])
......@@ -891,7 +907,7 @@ class TaxonSourceManager:
# the taxon already exists in the tree
# check synonyms for existing taxa
existing_taxon = existing_children_query.filter(taxon_latname=child.latname,
author=child.author)
taxon_author=child.author)
if len(existing_taxon) > 1:
raise ValueError('Found more than one child with the same latname/author combination in the children group: %s' % child.latname)
......@@ -1023,6 +1039,8 @@ class TaxonSourceManager:
def _climb_down(self, parent_taxon):
self.logger.info('climb down: {0}'.format(parent_taxon.latname))
# if no nuid is found, it might be a duplicate
is_duplicate = self._check_taxon_duplicate(parent_taxon)
if is_duplicate:
......@@ -1048,7 +1066,12 @@ class TaxonSourceManager:
parent_taxon = children[0]
first_child = children[0]
self.logger.info('first child: {0}'.format(first_child.latname))
else:
self.logger.info('no more children found for: {0}'.format(parent_taxon.latname))
if self.first_run == False:
# check if there are children in the database thet need to be deleted
# print("parent: %s %s" % (parent_taxon.latname, parent_taxon.get_nuid()))
......@@ -1063,6 +1086,8 @@ class TaxonSourceManager:
# if a parent_taxon that has no children is passed, return the parent taxon
# the tree climber will then go to the next sibling - as it would with the first_child
if first_child is None:
self.logger.info('climb down returning: {0}'.format(parent_taxon.latname))
return parent_taxon
self.logger.info('climb down: {0}'.format(first_child.latname))
return first_child
This diff is collapsed.
File added
# Generated by Django 3.1 on 2020-11-05 09:50
from django.db import migrations, models
import django.db.models.deletion
import uuid
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='AlgaebaseTaxonNamesView',
fields=[
('name_uuid', models.UUIDField(editable=False, primary_key=True, serialize=False, unique=True)),
('taxon_latname', models.CharField(max_length=355)),
('taxon_author', models.CharField(max_length=100, null=True)),
('taxon_nuid', models.CharField(max_length=255)),
('name', models.CharField(max_length=255)),
('language', models.CharField(max_length=5, null=True)),
('name_type', models.CharField(choices=[('taxontree', 'TaxonTree'), ('synonym', 'TaxonSynonym'), ('locale', 'TaxonLocale')], max_length=100)),
('rank', models.CharField(max_length=255, null=True)),
],
options={
'abstract': False,
'managed': False,
},
),
migrations.CreateModel(
name='AlgaebaseTaxonTree',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name_uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('taxon_latname', models.CharField(max_length=255)),
('taxon_author', models.CharField(max_length=255, null=True)),
('taxon_nuid', models.CharField(max_length=255, unique=True)),
('rank', models.CharField(max_length=255, null=True)),
('is_root_taxon', models.BooleanField(default=False)),
('slug', models.SlugField(max_length=100, null=True, unique=True)),
('source_id', models.CharField(max_length=255, unique=True)),
('additional_data', models.JSONField(null=True)),
('parent', models.ForeignKey(null=True, on_delete=django.db.models.deletion.PROTECT, to='algaebase.algaebasetaxontree')),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='AlgaebaseTaxonSynonym',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name_uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('taxon_latname', models.CharField(max_length=255)),
('taxon_author', models.CharField(max_length=255, null=True)),
('slug', models.SlugField(max_length=100, null=True, unique=True)),
('source_id', models.CharField(max_length=255)),
('additional_data', models.JSONField(null=True)),
('taxon', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='algaebase.algaebasetaxontree', to_field='name_uuid')),
],
options={
'unique_together': {('taxon', 'taxon_latname', 'taxon_author')},
},
),
migrations.CreateModel(
name='AlgaebaseTaxonLocale',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name_uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('name', models.CharField(max_length=255)),
('language', models.CharField(max_length=2)),
('iso6392', models.CharField(max_length=3, null=True)),
('language_region', models.CharField(max_length=5, null=True)),
('preferred', models.BooleanField(default=False)),
('slug', models.SlugField(max_length=100, null=True, unique=True)),
('additional_data', models.JSONField(null=True)),
('taxon', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='algaebase.algaebasetaxontree', to_field='name_uuid')),
],
options={
'index_together': {('taxon', 'language')},
},
),
]
......@@ -15,7 +15,7 @@ class AlgaebaseTaxonSynonym(TaxonSynonym):
class AlgaebaseTaxonLocale(TaxonLocale):
taxon = models.ForeignKey(algaebaseTaxonTree, on_delete=models.CASCADE, to_field='name_uuid')
taxon = models.ForeignKey(AlgaebaseTaxonTree, on_delete=models.CASCADE, to_field='name_uuid')
class Meta:
index_together = [
......
{"current_root_taxon": {"latname": "Acritarcha phylum incertae sedis", "author": null, "rank": "phylum", "source": "algaebase2020", "source_id": "phylum_Acritarcha phylum incertae sedis_None_root", "kwargs": {"nuid": "001"}}, "last_parent": {"latname": "Zostera muelleri", "author": "Irmisch ex Ascherson 1867", "rank": "species", "source": "algaebase2020", "source_id": "21522", "kwargs": {"nuid": "00z001001001006002008"}}, "last_saved_child": {"latname": "Zostera muelleri var. novazelandica", "author": "(Setchell) S.W.L.Jacobs 2006", "rank": "variety", "source": "algaebase2020", "source_id": "170274", "kwargs": {"nuid": "00z001001001006002008003"}}, "timestamp": 1605302974}
\ No newline at end of file
......@@ -296,7 +296,7 @@ class ColTaxonSourceManager(TaxonSourceManager):
break
if DEBUG == True:
print('_get_next_sibling end, found %s' %(sibling.latname))
print('_get_next_sibling end, found {0}'.format(sibling.latname))
return sibling
......@@ -315,7 +315,7 @@ class ColTaxonSourceManager(TaxonSourceManager):
colCursor.execute('''SELECT * FROM taxon WHERE "taxonID" = %s ''',
[db_taxon['parentNameUsageID'],])
db_parent = colCursor.fetone()
db_parent = colCursor.fetchone()
if db_parent:
parent = self._sourcetaxon_from_db_taxon(db_parent)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment