%{ /* -*- Mode: C++ -*- */ /* --extra-options-gcc: -Wno-unused -Wno-undef */ #include #define CHILD_CNT(X) ((X) & 0x0007f) #define WITH_SYMBOL(X) ((X) & 0x00080) #define MAIN_TYPE(X) ((X) & 0x0ff00) #define MINOR_TYPE(X) ((X) & 0x00f00) #define SUB_TYPE(X) ((X) & 0xf0000) #define SUB_TYPE_TYPE(X) ((X) & 0x0f000) enum SynSubTypeType { SYN_ORIENTATION = 0x07000, SYN_ABSOLUTE = 0x08000, }; enum SynSubTypeOrientation { SYN_ORI_LEFT = 0x00000, SYN_ORI_RIGHT = 0x10000 }; enum SynMinorTypeOrientation { SYN_ORI_LIST = 0x00000, SYN_ORI_COORD = 0x00100, SYN_ORI_SUBORD = 0x00200, }; enum SynSubTypeAbsolute { SYN_ABS_CONSTR = 0x00000, SYN_ABS_NOUN = 0x10000, SYN_ABS_VERB = 0x20000 }; enum SynMinorTypeAbsolute { SYN_ABS_SIMPLE = 0x00000, SYN_ABS_EHRC = 0x00100, SYN_ABS_IHRC = 0x00200, SYN_ABS_CLAUSE = 0x00300, SYN_ABS_LEX = 0x00400, SYN_ABS_SUFFIX = 0x00500, SYN_ABS_DERIV = 0x00600, }; enum SynNodeType { /*0*/ /*1*/ /*2*/ /*00*/ /*--*/ /*--*/ /*--*/ /*No Subtype: */ /*01*/ SYN_SUFFIX = 0x00180, /*02*/ SYN_SUFFIX_MOD1 = 0x00281, /*03*/ /*04*/ SYN_ADVERB_MOOD_GAP = 0x00481, /*05*/ SYN_ADVERB_MOOD_FULL = 0x00503, /*06*/ SYN_ADVERB_LEX = 0x00680, /*07*/ SYN_ADVERB_GAP = 0x00701, /*08*/ SYN_ADVERB_FULL = 0x00802, /*09*/ SYN_SENTENCE = 0x00981, /*0a*/ SYN_CLAUSE = 0x00a02, /*0b*/ SYN_CONSTR = 0x00b80, /*0c*/ SYN_NOUN_MOOD = 0x00c02, /*0d*/ /*0e*/ /*0f*/ /*..*/ /* SYN_ORIENTATION / SynSubTypeOrientation / SynMinorTypeOrientation : */ /*70*/ SYN_LEFT_LIST = 0x07002, SYN_RIGHT_LIST = 0x17002, /*71*/ SYN_LEFT_COORD = 0x07182, SYN_RIGHT_COORD = 0x17182, /*72*/ SYN_LEFT_SUBORD = 0x07282, SYN_RIGHT_SUBORD = 0x17282, /*..*/ /* SYN_ABSOLUTE / SynSubTypeAbsolute / SynMinorTypeAbsolute: */ /*80*/ /*defective*/ SYN_NOUN_SIMPLE = 0x18002, SYN_VERB_SIMPLE = 0x28002, /*81*/ /*defective*/ SYN_NOUN_EHRC = 0x18103, SYN_VERB_EHRC = 0x28103, /*82*/ /*defective*/ SYN_NOUN_IHRC = 0x18202, SYN_VERB_IHRC = 0x28202, /*83*/ /*defective*/ SYN_NOUN_CLAUSE = 0x18302, SYN_VERB_CLAUSE = 0x28302, /*84*/ SYN_CONSTR_LEX = 0x08401, SYN_NOUN_LEX = 0x18401, SYN_VERB_LEX = 0x28401, /*85*/ SYN_CONSTR_SUFFIX = 0x08502, SYN_NOUN_SUFFIX = 0x18502, SYN_VERB_SUFFIX = 0x28502, /*86*/ SYN_CONSTR_DERIV = 0x08602, SYN_NOUN_DERIV = 0x18602, SYN_VERB_DERIV = 0x28602, }; /* * Note: in the presence of a construct case, modifiers modify the substantive * in construct case, not the preceeding one. I.e.: * * big REL man == big man * big REL man hand == man's big hand (NOT: big man's hand) * * In fact, "big person's hand" cannot directly be expressed but needs to be * periphrased. E.g. in the following way: * * big REL man this-one hand == 'big man his hand' = big man's hand * * (-> noun-noun coordination without linking conjunction.) * * The system is defective, because otherwise, we'd need to allow free construct * state words as heads of internally headed relative clauses or after a relative * pronoun. But in fact, we want the construct state to directly follow its * possessor. */ class Syn { public: typedef SynNodeType NodeType; private: NodeType m_type; Symbol m_symbol; Syn **m_child; public: NodeType type() const { return m_type; } Symbol symbol() const { return m_symbol; } int child_cnt() const { return CHILD_CNT(type()); } Syn *child(int i) const { return0_if_out_of_bounds (i, child_cnt()); return m_child[i]; } #ifndef NDEBUG Syn (NodeType n, Symbol a): m_type (n), m_symbol (a), m_child (NULL) { return_if_fail (WITH_SYMBOL(n)); return_if_fail (CHILD_CNT(n) == 0); } Syn (NodeType n, Symbol a, Syn *b): m_type (n), m_symbol (a), m_child (new Syn*[1]) { return_if_fail (WITH_SYMBOL(n)); return_if_fail (CHILD_CNT(n) == 1); m_child[0]= b; } Syn (NodeType n, Symbol a, Syn *b, Syn *c): m_type (n), m_symbol (a), m_child (new Syn*[2]) { return_if_fail (WITH_SYMBOL(n)); return_if_fail (CHILD_CNT(n) == 2); m_child[0]= b; m_child[1]= c; } Syn (NodeType n, Syn *a): m_type (n), m_symbol (NULL), m_child (new Syn*[1]) { return_if_fail (!WITH_SYMBOL(n)); return_if_fail (CHILD_CNT(n) == 1); m_child[0]= a; } Syn (NodeType n, Syn *a, Syn *b): m_type (n), m_symbol (NULL), m_child (new Syn*[2]) { return_if_fail (!WITH_SYMBOL(n)); return_if_fail (CHILD_CNT(n) == 2); m_child[0]= a; m_child[1]= b; } Syn (NodeType n, Syn *a, Syn *b, Syn *c): m_type (n), m_symbol (NULL), m_child (new Syn*[3]) { return_if_fail (!WITH_SYMBOL(n)); return_if_fail (CHILD_CNT(n) == 3); m_child[0]= a; m_child[1]= b; m_child[2]= c; } #else Syn (NodeType n, ...): m_type (n), m_symbol(NULL), m_child (CHILD_CNT(n) == 0 ? NULL : new Syn*[CHILD_CNT(n)]) { va_list va; va_start(va, n); if (WITH_SYMBOL(n)) n_symbol= va_arg (va, Symbol); for (int i=0; i < child_cnt(); i++) n_child[i]= va_arg(va, Syn*); va_end (va); } #endif ~Syn() { for (int i=0; i < child_cnt(); i++) delete m_child[i]; if (m_child) delete[] m_child; } }; %} %union { Symbol symbol; Syn *tree; }; /* * P_ : particles: from closed lexical classes * S_ : special particle: one special particle, which does not carry but syntactical information * W_ : stems: from open lexical classes * T_ : technical: purely technical tokens for implementing the parser * */ /* Lower priority linkers so that * a & b & c REL x REL Y * is parsed as * (a & b & ((c REL x) REL Y)) */ %right P_COORD_CLAUSE_LOW P_SUBORD_CLAUSE_LOW /* Constructors: */ %right S_RES_VERB S_ABSTRACT_VERB S_REL S_RES_NOUN S_ABSTRACT_NOUN /* Higher priority linkers so that * a & b & c REL x REL Y * is parsed as * (((a & b & c) REL x) REL Y) * * Subord might be removed since it implements a binary relation while * the lang should actually be free of anything but unary predicates. */ %right P_COORD_CLAUSE_HIGH P_SUBORD_CLAUSE_HIGH /* Other lexicon entries: */ %token W_VERB %token W_NOUN %token W_CONSTR %token P_ADVERB %token P_MOOD %token P_GAP_MOOD %token P_SPEECH_ACT %token P_SUFFIX %token P_SUFFIX_MOD1 %token P_COORD_ABS %token S_DERIV /* End of file: */ %token T_EOF %type sentence %type sentence_list %type adverb_mood %type adverb %type adverb_list %type clause %type clause_seq %type suffix %type constr_deriv %type constr_pe %type verb_deriv %type verb %type verb_coord %type noun_deriv %type noun %type noun_coord %type noun_mood %type noun_mood_coord %start text %% text: sentence_list T_EOF { YYACCEPT; } ; sentence_list: sentence { $$= $1; } | sentence_list sentence { $$= new Syn (SYN_LEFT_LIST, $1, $2); } ; sentence: P_SPEECH_ACT clause_seq { $$= new Syn (SYN_SENTENCE, $1, $2); } ; adverb_mood: P_GAP_MOOD verb_coord { $$= new Syn (SYN_ADVERB_MOOD_GAP, $1, $2); } | noun_mood_coord verb_coord { $$= new Syn (SYN_ADVERB_MOOD_FULL, $1, $2); } ; adverb: P_ADVERB { $$= new Syn (SYN_ADVERB_LEX, $1); } | verb_coord { $$= new Syn (SYN_ADVERB_GAP, $1); } | noun_coord verb_coord { $$= new Syn (SYN_ADVERB_FULL, $1, $2); } ; adverb_list: adverb { $$= $1; } | adverb_list adverb { $$= new Syn (SYN_LEFT_LIST, $1, $2); } ; clause: adverb_mood { $$= new Syn (SYN_CLAUSE, $1, NULL); } | adverb_mood adverb_list { $$= new Syn (SYN_CLAUSE, $1, $2); } ; clause_seq: clause { $$= $1; } | clause P_COORD_CLAUSE_LOW clause_seq { $$= new Syn (SYN_RIGHT_COORD, $2, $1, $3); } | clause P_SUBORD_CLAUSE_LOW clause_seq { $$= new Syn (SYN_RIGHT_SUBORD, $2, $1, $3); } | clause P_COORD_CLAUSE_HIGH clause_seq { $$= new Syn (SYN_RIGHT_COORD, $2, $1, $3); } | clause P_SUBORD_CLAUSE_HIGH clause_seq { $$= new Syn (SYN_RIGHT_SUBORD, $2, $1, $3); } ; suffix: P_SUFFIX { $$= new Syn (SYN_SUFFIX, $1); } | suffix P_SUFFIX_MOD1 { $$= new Syn (SYN_SUFFIX_MOD1, $2, $1); } ; constr_deriv: W_CONSTR { $$= new Syn (SYN_CONSTR_LEX, $1); } | constr_deriv suffix { $$= new Syn (SYN_CONSTR_SUFFIX, $1, $2); } | constr_deriv S_DERIV W_VERB { $$= new Syn (SYN_CONSTR_DERIV, $3, $1); } ; constr_pe: /*nix*/ { $$= NULL; } | constr_deriv { $$= new Syn (SYN_CONSTR, $1); } ; /* verb: */ verb_deriv: W_VERB { $$= new Syn (SYN_VERB_LEX, $1); } | verb_deriv suffix { $$= new Syn (SYN_VERB_SUFFIX, $1, $2); } | verb_deriv S_DERIV W_VERB { $$= new Syn (SYN_VERB_DERIV, $3, $1); } ; verb: verb_deriv constr_pe { $$= new Syn (SYN_VERB_SIMPLE, $1, $2); } | clause_seq S_REL verb_deriv constr_pe { $$= new Syn (SYN_VERB_EHRC, $3, $1, $4); } | clause_seq S_RES_VERB constr_pe { $$= new Syn (SYN_VERB_IHRC, $1, $3); } | clause_seq S_ABSTRACT_VERB constr_pe { $$= new Syn (SYN_VERB_CLAUSE, $1, $3); } ; verb_coord: verb { $$= $1; } | verb_coord P_COORD_ABS verb { $$= new Syn (SYN_LEFT_COORD, $2, $1, $3); } /* Might be a different coordination due to different word boundary * compared to noun coordination. */ ; /* noun: */ noun_deriv: W_NOUN { $$= new Syn (SYN_NOUN_LEX, $1); } | noun_deriv suffix { $$= new Syn (SYN_NOUN_SUFFIX, $1, $2); } | noun_deriv S_DERIV W_VERB { $$= new Syn (SYN_NOUN_DERIV, $3, $1); } ; noun: noun_deriv constr_pe { $$= new Syn (SYN_NOUN_SIMPLE, $1, $2); } | clause_seq S_REL noun_deriv constr_pe { $$= new Syn (SYN_NOUN_EHRC, $3, $1, $4); } | clause_seq S_RES_NOUN constr_pe { $$= new Syn (SYN_NOUN_IHRC, $1, $3); } | clause_seq S_ABSTRACT_NOUN constr_pe { $$= new Syn (SYN_NOUN_CLAUSE, $1, $3); } ; noun_coord: noun { $$= $1; } | noun_coord noun { $$= new Syn (SYN_LEFT_LIST, $1, $2); } | noun_coord P_COORD_ABS noun { $$= new Syn (SYN_LEFT_COORD, $2, $1, $3); } ; /* noun_mood: */ noun_mood: noun P_MOOD { $$= new Syn (SYN_NOUN_MOOD, $2, $1); } ; noun_mood_coord: noun_mood { $$= $1; } | noun_mood_coord noun { $$= new Syn (SYN_LEFT_LIST, $1, $2); } | noun_mood_coord P_COORD_ABS noun { $$= new Syn (SYN_LEFT_COORD, $2, $1, $3); } ;