Source code for sanskrit_data.schema.ullekhanam

# -*- coding: utf-8 -*-

-  Annotations are stored in a directed acyclic graph, for example - a book portion having a TextAnnotation having PadaAnnotations having SamaasaAnnotations.

    -  Some Annotations (eg. SandhiAnnotation, TextAnnotation) can
       have multiple "targets" (ie. other objects being annotated).
    -  Rather than a simple tree, we end up with a Directed Acyclic
       Graph (DAG) of Annotation objects.

-  JSON schema mindmap
   `here <,%22action%22:%22open%22,%22userId%22:%22109000762913288837175%22%7D>`__
   (Updated as needed).
- For general context and class diagram, refer to :mod:`~sanskrit_data.schema`.

import logging
import sys

from sanskrit_data.schema import common
from sanskrit_data.schema.books import BookPortion
from sanskrit_data.schema.common import JsonObject, JsonObjectWithTarget, Target, ScriptRendering, Text

  format="%(levelname)s: %(asctime)s {%(filename)s:%(lineno)d}: %(message)s "

[docs]class AnnotationSource(JsonObject): schema = common.recursively_merge(JsonObject.schema, ({ "type": "object", "description": "Source of the annotation which contains this node.", common.TYPE_FIELD: { "enum": ["AnnotationSource"] }, "properties": { "source_type": { "type": "string", "enum": ["system_inferred", "user_supplied"], "description": "Does this annotation come from a machine, or a human? source_ prefix avoids keyword conflicts in some languages.", }, "id": { "type": "string", "description": "Something to identify the particular annotation source.", } }, "required": ["source_type"] })) # noinspection PyShadowingBuiltins
[docs] @classmethod def from_details(cls, source_type, id): source = AnnotationSource() source.source_type = source_type = id source.validate_schema() return source
[docs]class Annotation(JsonObjectWithTarget): schema = common.recursively_merge(JsonObjectWithTarget.schema, ({ "type": "object", "properties": { common.TYPE_FIELD: { "enum": ["Annotation"] }, "source": AnnotationSource.schema, "targets": { "type": "array", "description": "The entity being annotated.", "minLength": 1, "items": Target.schema } }, "required": ["targets", "source"] }))
[docs] def validate(self, db_interface=None): if "user" in self.source.source_type: from flask import session # logging.debug(session.get('user', None)) user = JsonObject.make_from_dict(session.get('user', None)) # logging.debug(user) = user.get_user_ids()[0] super(Annotation, self).validate(db_interface=db_interface)
[docs] @classmethod def get_allowed_target_classes(cls): return [BookPortion, Annotation]
[docs] def set_base_details(self, targets, source): # noinspection PyAttributeOutsideInit self.targets = targets # noinspection PyAttributeOutsideInit self.source = source
[docs]class Rectangle(JsonObject): schema = common.recursively_merge(JsonObject.schema, ({ "type": "object", "description": "A rectangle within an image.", "properties": { common.TYPE_FIELD: { "enum": ["Rectangle"] }, "x1": { "type": "integer" }, "y1": { "type": "integer" }, "w": { "type": "integer" }, "h": { "type": "integer" }, }, "required": ["x1", "y1", "w", "h"] }))
[docs] @classmethod def from_details(cls, x=-1, y=-1, w=-1, h=-1, score=0.0): rectangle = Rectangle() rectangle.x1 = x rectangle.y1 = y rectangle.w = w rectangle.h = h rectangle.score = score rectangle.validate() return rectangle
# Two (segments are 'equal' if they overlap def __eq__(self, other): xmax = max(self.x, other.x) ymax = max(self.y, other.y) overalap_w = min(self.x + self.w, other.x + other.w) - xmax overalap_h = min(self.y + self.h, other.y + other.h) - ymax return overalap_w > 0 and overalap_h > 0 def __ne__(self, other): return not self.__eq__(other) def __cmp__(self, other): if self == other: + " overlaps " + str(other)) return 0 elif (self.y < other.y) or ((self.y == other.y) and (self.x < other.x)): return -1 else: return 1
# noinspection PyMethodOverriding
[docs]class ImageTarget(Target): schema = common.recursively_merge(Target.schema, ({ "type": "object", "description": "The rectangle within the image being targetted.", "properties": { common.TYPE_FIELD: { "enum": ["ImageTarget"] }, "rectangle": Rectangle.schema }, "required": ["rectangle"] })) # TODO use w, h instead. # noinspection PyMethodOverriding
[docs] @classmethod def from_details(cls, container_id, rectangle): target = ImageTarget() target.container_id = container_id target.rectangle = rectangle target.validate() return target
[docs]class ImageAnnotation(Annotation): """ Mark a certain fragment of an image. `An introductory video <>`_ """ schema = common.recursively_merge(Annotation.schema, ({ "type": "object", "description": "A rectangle within an image, picked by a particular annotation source.", "properties": { common.TYPE_FIELD: { "enum": ["ImageAnnotation"] }, "targets": { "type": "array", "items": ImageTarget.schema } }, })) target_class = ImageTarget
[docs] @classmethod def get_allowed_target_classes(cls): return [BookPortion, ImageAnnotation]
[docs] @classmethod def from_details(cls, targets, source): annotation = ImageAnnotation() annotation.set_base_details(targets, source) annotation.validate() return annotation
# Targets: ImageAnnotation(s) or TextAnnotation or BookPortion
[docs]class TextAnnotation(Annotation): schema = common.recursively_merge(Annotation.schema, ({ "type": "object", "description": "Annotation of some (sub)text from within the object (image or another text) being annotated.", "properties": { common.TYPE_FIELD: { "enum": ["TextAnnotation"] }, "content": Text.schema, }, "required": ["content"] }))
[docs] @classmethod def get_allowed_target_classes(cls): return [BookPortion, ImageAnnotation]
[docs] @classmethod def from_details(cls, targets, source, content): annotation = TextAnnotation() annotation.set_base_details(targets, source) annotation.content = content annotation.validate() return annotation
[docs]class CommentAnnotation(TextAnnotation): schema = common.recursively_merge(TextAnnotation.schema, ({ "description": "A comment that can be associated with nearly any Annotation or BookPortion.", }))
[docs] @classmethod def get_allowed_target_classes(cls): return [BookPortion, Annotation]
[docs]class TextOffsetAddress(JsonObject): schema = common.recursively_merge(JsonObject.schema, { "type": "object", "description": "A way to specify a substring.", "properties": { common.TYPE_FIELD: { "enum": ["TextOffsetAddress"] }, "start": { "type": "integer" }, "end": { "type": "integer" } }})
[docs] @classmethod def from_details(cls, start, end): obj = TextOffsetAddress() obj.start = start obj.end = end obj.validate() return obj
[docs]class TextTarget(Target): schema = common.recursively_merge(Target.schema, ({ "type": "object", "description": "A way to specify a particular substring within a string.", "properties": { common.TYPE_FIELD: { "enum": ["TextTarget"] }, "shabda_id": { "type": "string", "description": "Format: pada_index.shabda_index or just pada_index." "Suppose that some shabda in 'rāgādirogān satatānuṣaktān' is being targetted. " "This has the following pada-vigraha: rāga [comp.]-ādi [comp.]-roga [ac.p.m.] satata [comp.]-anuṣañj [ac.p.m.]." "Then, rāga has the id 1.1. roga has id 1.3. satata has the id 2.1." }, "offset_address": TextOffsetAddress.schema }, }))
[docs] @classmethod def from_details(cls, container_id, shabda_id=None, offset_address=None): target = TextTarget() target.container_id = container_id if shabda_id is not None: target.shabda_id = shabda_id if offset_address is not None: target.offset_address = offset_address target.validate() return target
# noinspection PyMethodOverriding
[docs]class PadaAnnotation(Annotation): schema = common.recursively_merge(Annotation.schema, ({ "type": "object", "description": "A grammatical pada - subanta or tiNanta.", "properties": { common.TYPE_FIELD: { "enum": ["PadaAnnotation"] }, "targets": { "type": "array", "items": TextTarget.schema }, "word": { "type": Text.schema }, "root": { "type": Text.schema } }, })) target_class = TextTarget
[docs] @classmethod def get_allowed_target_classes(cls): return [BookPortion, TextAnnotation]
[docs] def set_base_details(self, targets, source, word, root): super(PadaAnnotation, self).set_base_details(targets, source) # noinspection PyAttributeOutsideInit self.word = word # noinspection PyAttributeOutsideInit self.root = root
[docs] @classmethod def from_details(cls, targets, source, word, root): annotation = PadaAnnotation() annotation.set_base_details(targets, source, word, root) annotation.validate() return annotation
# Targets: TextTarget pointing to TextAnnotation # noinspection PyMethodOverriding
[docs]class SubantaAnnotation(PadaAnnotation): schema = common.recursively_merge(PadaAnnotation.schema, ({ "type": "object", "description": "Anything ending with a sup affix. Includes avyaya-s.", "properties": { common.TYPE_FIELD: { "enum": ["SubantaAnnotation"] }, "linga": { "type": "string", "enum": ["strii", "pum", "napum", "avyaya"] }, "vibhakti": { "type": "string", "enum": ["1", "2", "3", "4", "5", "6", "7", "1.sambodhana"] }, "vachana": { "type": "integer", "enum": [1, 2, 3] } }, }))
[docs] @classmethod def from_details(cls, targets, source, word, root, linga, vibhakti, vachana): obj = SubantaAnnotation() obj.set_base_details(targets, source, word, root) obj.linga = linga obj.vibhakti = vibhakti obj.vachana = vachana obj.validate() return obj
# noinspection PyMethodOverriding,PyPep8Naming
[docs]class TinantaAnnotation(PadaAnnotation): schema = common.recursively_merge(PadaAnnotation.schema, ({ "type": "object", "description": "Anything ending with a tiN affix.", "properties": { common.TYPE_FIELD: { "enum": ["TinantaAnnotation"] }, "lakAra": { "type": "string", "enum": ["laT", "laN", "vidhi-liN", "AshIr-liN", "loT", "liT", "luT", "LT", "luN", "LN", "leT"] }, "puruSha": { "type": "string", "enum": ["prathama", "madhyama", "uttama"] }, "vachana": { "type": "integer", "enum": [1, 2, 3] } }, }))
[docs] @classmethod def from_details(cls, targets, source, word, root, lakAra, puruSha, vachana): obj = TinantaAnnotation() obj.set_base_details(targets, source, word, root) obj.lakAra = lakAra obj.puruSha = puruSha obj.vachana = vachana obj.validate() return obj
# Targets: a pair of textAnnotation or BookPortion objects
[docs]class TextSambandhaAnnotation(Annotation): schema = common.recursively_merge(Annotation.schema, ({ "type": "object", "description": "Describes connection between two text portions. Such connection is directional (ie it connects words in a source sentence to words in a target sentence.)", "properties": { common.TYPE_FIELD: { "enum": ["TextSambandhaAnnotation"] }, "targets": { "description": "A pair of texts being connected. First text is the 'source text', second is the 'target text'", }, "category": { "type": "string" }, "source_text_padas": { "type": "array", "description": "The entity being annotated.", "items": Target.schema }, "target_text_padas": { "type": "array", "description": "The entity being annotated.", "items": Target.schema } }, "required": ["combined_string"] }))
[docs] def validate(self, db_interface=None): super(TextSambandhaAnnotation, self).validate(db_interface=db_interface) self.validate_targets(targets=self.source_text_padas, allowed_types=[PadaAnnotation], db_interface=db_interface) self.validate_targets(targets=self.target_text_padas, allowed_types=[PadaAnnotation], db_interface=db_interface)
[docs] @classmethod def get_allowed_target_classes(cls): return [BookPortion, TextAnnotation]
# Targets: two or more PadaAnnotations
[docs]class SandhiAnnotation(Annotation): schema = common.recursively_merge(Annotation.schema, ({ "type": "object", "properties": { common.TYPE_FIELD: { "enum": ["SandhiAnnotation"] }, "combined_string": { "type": Text.schema }, "sandhi_type": { "type": "string" } }, "required": ["combined_string"] }))
[docs] @classmethod def get_allowed_target_classes(cls): return [PadaAnnotation]
[docs] @classmethod def from_details(cls, targets, source, combined_string, sandhi_type="UNK"): annotation = SandhiAnnotation() annotation.set_base_details(targets, source) annotation.combined_string = combined_string annotation.sandhi_type = sandhi_type annotation.validate() return annotation
# Targets: one PadaAnnotation (the samasta-pada)
[docs]class SamaasaAnnotation(Annotation): schema = common.recursively_merge(Target.schema, ({ "type": "object", "properties": { common.TYPE_FIELD: { "enum": ["SamaasaAnnotation"] }, "component_padas": { "type": "array", "description": "Pointers to PadaAnnotation objects corresponding to components of the samasta-pada", "items": Target.schema }, "type": { "type": "string" } }, }))
[docs] @classmethod def get_allowed_target_classes(cls): return [PadaAnnotation]
[docs] def validate(self, db_interface=None): super(SamaasaAnnotation, self).validate(db_interface=db_interface) self.validate_targets(targets=self.component_padas, allowed_types=[PadaAnnotation], db_interface=db_interface)
[docs] @classmethod def from_details(cls, targets, source, combined_string, samaasa_type="UNK"): annotation = SamaasaAnnotation() annotation.set_base_details(targets, source) annotation.combined_string = combined_string annotation.type = samaasa_type annotation.validate() return annotation
# Essential for depickling to work. common.update_json_class_index(sys.modules[__name__]) logging.debug(common.json_class_index)