Skip to content

Commit

Permalink
Merge pull request #130 from JSv4/JSv4/better-docs-and-more-modular-p…
Browse files Browse the repository at this point in the history
…arsing

Better Docs and Modular Extract Tasks
  • Loading branch information
JSv4 authored Jun 23, 2024
2 parents 322de35 + de9e149 commit a349ea7
Show file tree
Hide file tree
Showing 53 changed files with 11,362 additions and 7,316 deletions.
3 changes: 3 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
![OpenContracts](/docs/assets/images/logos/OpenContracts.webp)

![OpenContracts](/docs/assets/images/logos/OS_Legal_Logo.png)
# OpenContracts
## The Free and Open Source Document Analytics Platform

---
Expand Down
16 changes: 1 addition & 15 deletions config/graphql/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,7 @@
)
from opencontractserver.corpuses.models import Corpus, CorpusQuery
from opencontractserver.documents.models import Document
from opencontractserver.extracts.models import (
Column,
Datacell,
Extract,
Fieldset,
LanguageModel,
)
from opencontractserver.extracts.models import Column, Datacell, Extract, Fieldset
from opencontractserver.users.models import Assignment, UserExport

User = get_user_model()
Expand Down Expand Up @@ -381,14 +375,6 @@ class Meta:
}


class LanguageModelFilter(django_filters.FilterSet):
class Meta:
model = LanguageModel
fields = {
"model": ["exact", "contains"],
}


class FieldsetFilter(django_filters.FilterSet):
class Meta:
model = Fieldset
Expand Down
15 changes: 1 addition & 14 deletions config/graphql/graphene_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,7 @@
)
from opencontractserver.corpuses.models import Corpus, CorpusQuery
from opencontractserver.documents.models import Document
from opencontractserver.extracts.models import (
Column,
Datacell,
Extract,
Fieldset,
LanguageModel,
)
from opencontractserver.extracts.models import Column, Datacell, Extract, Fieldset
from opencontractserver.users.models import Assignment, UserExport, UserImport

User = get_user_model()
Expand Down Expand Up @@ -291,13 +285,6 @@ class Meta:
connection_class = CountableConnection


class LanguageModelType(AnnotatePermissionsForReadMixin, DjangoObjectType):
class Meta:
model = LanguageModel
interfaces = [relay.Node]
connection_class = CountableConnection


class ColumnType(AnnotatePermissionsForReadMixin, DjangoObjectType):
class Meta:
model = Column
Expand Down
48 changes: 9 additions & 39 deletions config/graphql/mutations.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
ExtractType,
FieldsetType,
LabelSetType,
LanguageModelType,
RelationInputType,
RelationshipType,
UserExportType,
Expand All @@ -52,13 +51,7 @@
)
from opencontractserver.corpuses.models import Corpus, CorpusQuery, TemporaryFileHandle
from opencontractserver.documents.models import Document
from opencontractserver.extracts.models import (
Column,
Datacell,
Extract,
Fieldset,
LanguageModel,
)
from opencontractserver.extracts.models import Column, Datacell, Extract, Fieldset
from opencontractserver.tasks import (
build_label_lookups_task,
burn_doc_annotations,
Expand All @@ -77,7 +70,7 @@
package_funsd_exports,
package_langchain_exports,
)
from opencontractserver.tasks.extract_tasks import run_extract
from opencontractserver.tasks.extract_orchestrator_tasks import run_extract
from opencontractserver.tasks.permissioning_tasks import (
make_analysis_public_task,
make_corpus_public_task,
Expand Down Expand Up @@ -1481,25 +1474,6 @@ def resolve(cls, root, info, **kwargs):
return cls(user=info.context.user)


class CreateLanguageModel(graphene.Mutation):
class Arguments:
model = graphene.String(required=True)

ok = graphene.Boolean()
message = graphene.String()
obj = graphene.Field(LanguageModelType)

@staticmethod
@login_required
def mutate(root, info, model):
language_model = LanguageModel(model=model, creator=info.context.user)
language_model.save()
set_permissions_for_obj_to_user(
info.context.user, language_model, [PermissionTypes.CRUD]
)
return CreateLanguageModel(ok=True, message="SUCCESS!", obj=language_model)


class CreateFieldset(graphene.Mutation):
class Arguments:
name = graphene.String(required=True)
Expand Down Expand Up @@ -1534,10 +1508,10 @@ class Arguments:
output_type = graphene.String(required=False)
limit_to_label = graphene.String(required=False)
instructions = graphene.String(required=False)
language_model_id = graphene.ID(required=False)
agentic = graphene.Boolean(required=False)
extract_is_list = graphene.Boolean(required=False)
must_contain_text = graphene.String(required=False)
task_name = graphene.String(required=False)

ok = graphene.Boolean()
message = graphene.String()
Expand All @@ -1556,9 +1530,9 @@ def mutate(
limit_to_label=None,
instructions=None,
agentic=None,
task_name=None,
extract_is_list=None,
language_model_id=None,
fieldset_id=None,
must_contain_text=None,
):

Expand All @@ -1570,8 +1544,8 @@ def mutate(
pk = from_global_id(id)[1]
obj = Column.objects.get(pk=pk, creator=info.context.user)

if fieldset_id is not None:
obj.fieldset_id = from_global_id(fieldset_id)[1]
if task_name is not None:
obj.task_name = task_name

if language_model_id is not None:
obj.language_model_id = from_global_id(language_model_id)[1]
Expand Down Expand Up @@ -1621,11 +1595,11 @@ class Arguments:
output_type = graphene.String(required=True)
limit_to_label = graphene.String(required=False)
instructions = graphene.String(required=False)
language_model_id = graphene.ID(required=True)
agentic = graphene.Boolean(required=False)
extract_is_list = graphene.Boolean(required=False)
must_contain_text = graphene.String(required=False)
name = graphene.String(required=True)
task_name = graphene.String(required=False)

ok = graphene.Boolean()
message = graphene.String()
Expand All @@ -1639,7 +1613,7 @@ def mutate(
name,
fieldset_id,
output_type,
language_model_id,
task_name=None,
agentic=None,
extract_is_list=None,
must_contain_text=None,
Expand All @@ -1652,9 +1626,6 @@ def mutate(
raise ValueError("One of `query` or `match_text` must be provided.")

fieldset = Fieldset.objects.get(pk=from_global_id(fieldset_id)[1])
language_model = LanguageModel.objects.get(
pk=from_global_id(language_model_id)[1]
)
column = Column(
name=name,
fieldset=fieldset,
Expand All @@ -1663,8 +1634,8 @@ def mutate(
output_type=output_type,
limit_to_label=limit_to_label,
instructions=instructions,
language_model=language_model,
must_contain_text=must_contain_text,
**({"task_name": task_name} if task_name is not None else {}),
agentic=agentic if agentic is not None else False,
extract_is_list=extract_is_list if extract_is_list is not None else False,
creator=info.context.user,
Expand Down Expand Up @@ -1987,7 +1958,6 @@ class Mutation(graphene.ObjectType):
ask_query = StartQueryForCorpus.Field()

# EXTRACT MUTATIONS ##########################################################
create_language_model = CreateLanguageModel.Field()
create_fieldset = CreateFieldset.Field()

create_column = CreateColumn.Field()
Expand Down
83 changes: 45 additions & 38 deletions config/graphql/queries.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import inspect
import logging
import re

import graphene
from django.conf import settings
from django.db.models import Q
from graphene import relay
from graphene.types.generic import GenericScalar
from graphene_django.fields import DjangoConnectionField
from graphene_django.filter import DjangoFilterConnectionField
from graphql_jwt.decorators import login_required
Expand All @@ -27,7 +29,6 @@
GremlinEngineFilter,
LabelFilter,
LabelsetFilter,
LanguageModelFilter,
RelationshipFilter,
)
from config.graphql.graphene_types import (
Expand All @@ -45,7 +46,6 @@
FieldsetType,
GremlinEngineType_READ,
LabelSetType,
LanguageModelType,
PageAwareAnnotationType,
PdfPageInfoType,
RelationshipType,
Expand All @@ -61,13 +61,7 @@
)
from opencontractserver.corpuses.models import Corpus, CorpusQuery
from opencontractserver.documents.models import Document
from opencontractserver.extracts.models import (
Column,
Datacell,
Extract,
Fieldset,
LanguageModel,
)
from opencontractserver.extracts.models import Column, Datacell, Extract, Fieldset
from opencontractserver.shared.resolvers import resolve_oc_model_queryset
from opencontractserver.types.enums import LabelType
from opencontractserver.users.models import Assignment, UserExport, UserImport
Expand Down Expand Up @@ -638,35 +632,6 @@ def resolve_analyses(self, info, **kwargs):
Q(creator=info.context.user) | Q(is_public=True)
)

language_model = relay.Node.Field(LanguageModelType)

@login_required
def resolve_language_model(self, info, **kwargs):
django_pk = from_global_id(kwargs.get("id", None))[1]
if info.context.user.is_superuser:
return LanguageModel.objects.get(id=django_pk)
elif info.context.user.is_anonymous:
return LanguageModel.objects.get(Q(id=django_pk) & Q(is_public=True))
else:
return LanguageModel.objects.get(
Q(id=django_pk) & (Q(creator=info.context.user) | Q(is_public=True))
)

language_models = DjangoFilterConnectionField(
LanguageModelType, filterset_class=LanguageModelFilter
)

@login_required
def resolve_language_models(self, info, **kwargs):
if info.context.user.is_superuser:
return LanguageModel.objects.all()
elif info.context.user.is_anonymous:
return LanguageModel.objects.filter(Q(is_public=True))
else:
return LanguageModel.objects.filter(
Q(creator=info.context.user) | Q(is_public=True)
)

fieldset = relay.Node.Field(FieldsetType)

@login_required
Expand Down Expand Up @@ -811,3 +776,45 @@ def resolve_datacells(self, info, **kwargs):
return Datacell.objects.filter(
Q(extract__creator=info.context.user) | Q(is_public=True)
)

registered_extract_tasks = graphene.Field(GenericScalar)

@login_required
def resolve_registered_extract_tasks(self, info, **kwargs):
from config import celery_app

tasks = {}

# Try to get tasks from the app instance
# Get tasks from the app instance
try:
for task_name, task in celery_app.tasks.items():
if not task_name.startswith("celery."):
docstring = inspect.getdoc(task.run) or "No docstring available"
tasks[task_name] = docstring

except AttributeError as e:
logger.warning(f"Couldn't get tasks from app instance: {str(e)}")

# Saving for reference... but I don't think it's necessary ATM and it's much higher latency.
# Try to get tasks from workers
# try:
# i = celery_app.control.inspect(timeout=5.0, connect_timeout=5.0)
# registered_tasks = i.registered()
# if registered_tasks:
# for worker_tasks in registered_tasks.values():
# for task_name in worker_tasks:
# if not task_name.startswith('celery.') and task_name not in tasks:
# # For tasks only found on workers, we can't easily get the docstring
# tasks[task_name] = "Docstring not available for worker-only task"
# except CeleryError as e:
# logger.warning(f"Celery error while inspecting workers: {str(e)}")
# except Exception as e:
# logger.warning(f"Unexpected error while inspecting workers: {str(e)}")

# Filter out Celery's internal tasks
return {
task: description
for task, description in tasks.items()
if task.startswith("opencontractserver.tasks.data_extract_tasks")
}
Binary file added docs/assets/images/DataGrid.png/img.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/assets/images/screenshots/Datagrid.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/assets/images/screenshots/Edit_Column.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit a349ea7

Please sign in to comment.