Add proto files for language API.
This commit is contained in:
parent
3748d006c4
commit
1e645debc6
|
|
@ -0,0 +1,610 @@
|
|||
// Copyright 2016 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package google.cloud.language.v1beta1;
|
||||
|
||||
import "google/api/annotations.proto";
|
||||
|
||||
option java_multiple_files = true;
|
||||
option java_outer_classname = "LanguageServiceProto";
|
||||
option java_package = "com.google.cloud.language.v1beta1";
|
||||
|
||||
|
||||
// Provides text analysis operations such as sentiment analysis and entity
|
||||
// recognition.
|
||||
service LanguageService {
|
||||
// Analyzes the sentiment of the provided text.
|
||||
rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) {
|
||||
option (google.api.http) = { post: "/v1beta1/documents:analyzeSentiment" body: "*" };
|
||||
}
|
||||
|
||||
// Finds named entities (currently finds proper names) in the text,
|
||||
// entity types, salience, mentions for each entity, and other properties.
|
||||
rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) {
|
||||
option (google.api.http) = { post: "/v1beta1/documents:analyzeEntities" body: "*" };
|
||||
}
|
||||
|
||||
// Advanced API that analyzes the document and provides a full set of text
|
||||
// annotations, including semantic, syntactic, and sentiment information. This
|
||||
// API is intended for users who are familiar with machine learning and need
|
||||
// in-depth text features to build upon.
|
||||
rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
|
||||
option (google.api.http) = { post: "/v1beta1/documents:annotateText" body: "*" };
|
||||
}
|
||||
}
|
||||
|
||||
// ################################################################ #
|
||||
//
|
||||
// Represents the input to API methods.
|
||||
message Document {
|
||||
// The document types enum.
|
||||
enum Type {
|
||||
// The content type is not specified.
|
||||
TYPE_UNSPECIFIED = 0;
|
||||
|
||||
// Plain text
|
||||
PLAIN_TEXT = 1;
|
||||
|
||||
// HTML
|
||||
HTML = 2;
|
||||
}
|
||||
|
||||
// Required. If the type is not set or is `TYPE_UNSPECIFIED`,
|
||||
// returns an `INVALID_ARGUMENT` error.
|
||||
Type type = 1;
|
||||
|
||||
// The source of the document: a string containing the content or a
|
||||
// Google Cloud Storage URI.
|
||||
oneof source {
|
||||
// The content of the input in string format.
|
||||
string content = 2;
|
||||
|
||||
// The Google Cloud Storage URI where the file content is located.
|
||||
string gcs_content_uri = 3;
|
||||
}
|
||||
|
||||
// The language of the document (if not specified, the language is
|
||||
// automatically detected). Both ISO and BCP-47 language codes are
|
||||
// accepted.<br>
|
||||
// **Current Language Restrictions:**
|
||||
//
|
||||
// * Only English, Spanish, and Japanese textual content
|
||||
// are supported, with the following additional restriction:
|
||||
// * `analyzeSentiment` only supports English text.
|
||||
// If the language (either specified by the caller or automatically detected)
|
||||
// is not supported by the called API method, an `INVALID_ARGUMENT` error
|
||||
// is returned.
|
||||
string language = 4;
|
||||
}
|
||||
|
||||
// Represents a sentence in the input document.
|
||||
message Sentence {
|
||||
// The sentence text.
|
||||
TextSpan text = 1;
|
||||
}
|
||||
|
||||
// Represents a phrase in the text that is a known entity, such as
|
||||
// a person, an organization, or location. The API associates information, such
|
||||
// as salience and mentions, with entities.
|
||||
message Entity {
|
||||
// The type of the entity.
|
||||
enum Type {
|
||||
// Unknown
|
||||
UNKNOWN = 0;
|
||||
|
||||
// Person
|
||||
PERSON = 1;
|
||||
|
||||
// Location
|
||||
LOCATION = 2;
|
||||
|
||||
// Organization
|
||||
ORGANIZATION = 3;
|
||||
|
||||
// Event
|
||||
EVENT = 4;
|
||||
|
||||
// Work of art
|
||||
WORK_OF_ART = 5;
|
||||
|
||||
// Consumer goods
|
||||
CONSUMER_GOOD = 6;
|
||||
|
||||
// Other types
|
||||
OTHER = 7;
|
||||
}
|
||||
|
||||
// The representative name for the entity.
|
||||
string name = 1;
|
||||
|
||||
// The entity type.
|
||||
Type type = 2;
|
||||
|
||||
// Metadata associated with the entity.
|
||||
//
|
||||
// Currently, only Wikipedia URLs are provided, if available.
|
||||
// The associated key is "wikipedia_url".
|
||||
map<string, string> metadata = 3;
|
||||
|
||||
// The salience score associated with the entity in the [0, 1.0] range.
|
||||
//
|
||||
// The salience score for an entity provides information about the
|
||||
// importance or centrality of that entity to the entire document text.
|
||||
// Scores closer to 0 are less salient, while scores closer to 1.0 are highly
|
||||
// salient.
|
||||
float salience = 4;
|
||||
|
||||
// The mentions of this entity in the input document. The API currently
|
||||
// supports proper noun mentions.
|
||||
repeated EntityMention mentions = 5;
|
||||
}
|
||||
|
||||
// Represents the smallest syntactic building block of the text.
|
||||
message Token {
|
||||
// The token text.
|
||||
TextSpan text = 1;
|
||||
|
||||
// Parts of speech tag for this token.
|
||||
PartOfSpeech part_of_speech = 2;
|
||||
|
||||
// Dependency tree parse for this token.
|
||||
DependencyEdge dependency_edge = 3;
|
||||
|
||||
// [Lemma](https://en.wikipedia.org/wiki/Lemma_(morphology))
|
||||
// of the token.
|
||||
string lemma = 4;
|
||||
}
|
||||
|
||||
// Represents the feeling associated with the entire text or entities in
|
||||
// the text.
|
||||
message Sentiment {
|
||||
// Polarity of the sentiment in the [-1.0, 1.0] range. Larger numbers
|
||||
// represent more positive sentiments.
|
||||
float polarity = 1;
|
||||
|
||||
// A non-negative number in the [0, +inf) range, which represents
|
||||
// the absolute magnitude of sentiment regardless of polarity (positive or
|
||||
// negative).
|
||||
float magnitude = 2;
|
||||
}
|
||||
|
||||
// Represents part of speech information for a token.
|
||||
message PartOfSpeech {
|
||||
// The part of speech tags enum.
|
||||
enum Tag {
|
||||
// Unknown
|
||||
UNKNOWN = 0;
|
||||
|
||||
// Adjective
|
||||
ADJ = 1;
|
||||
|
||||
// Adposition (preposition and postposition)
|
||||
ADP = 2;
|
||||
|
||||
// Adverb
|
||||
ADV = 3;
|
||||
|
||||
// Conjunction
|
||||
CONJ = 4;
|
||||
|
||||
// Determiner
|
||||
DET = 5;
|
||||
|
||||
// Noun (common and proper)
|
||||
NOUN = 6;
|
||||
|
||||
// Cardinal number
|
||||
NUM = 7;
|
||||
|
||||
// Pronoun
|
||||
PRON = 8;
|
||||
|
||||
// Particle or other function word
|
||||
PRT = 9;
|
||||
|
||||
// Punctuation
|
||||
PUNCT = 10;
|
||||
|
||||
// Verb (all tenses and modes)
|
||||
VERB = 11;
|
||||
|
||||
// Other: foreign words, typos, abbreviations
|
||||
X = 12;
|
||||
|
||||
// Affix
|
||||
AFFIX = 13;
|
||||
}
|
||||
|
||||
// The part of speech tag.
|
||||
Tag tag = 1;
|
||||
}
|
||||
|
||||
// Represents dependency parse tree information for a token.
|
||||
message DependencyEdge {
|
||||
// The parse label enum for the token.
|
||||
enum Label {
|
||||
// Unknown
|
||||
UNKNOWN = 0;
|
||||
|
||||
// Abbreviation modifier
|
||||
ABBREV = 1;
|
||||
|
||||
// Adjectival complement
|
||||
ACOMP = 2;
|
||||
|
||||
// Adverbial clause modifier
|
||||
ADVCL = 3;
|
||||
|
||||
// Adverbial modifier
|
||||
ADVMOD = 4;
|
||||
|
||||
// Adjectival modifier of an NP
|
||||
AMOD = 5;
|
||||
|
||||
// Appositional modifier of an NP
|
||||
APPOS = 6;
|
||||
|
||||
// Attribute dependent of a copular verb
|
||||
ATTR = 7;
|
||||
|
||||
// Auxiliary (non-main) verb
|
||||
AUX = 8;
|
||||
|
||||
// Passive auxiliary
|
||||
AUXPASS = 9;
|
||||
|
||||
// Coordinating conjunction
|
||||
CC = 10;
|
||||
|
||||
// Clausal complement of a verb or adjective
|
||||
CCOMP = 11;
|
||||
|
||||
// Conjunct
|
||||
CONJ = 12;
|
||||
|
||||
// Clausal subject
|
||||
CSUBJ = 13;
|
||||
|
||||
// Clausal passive subject
|
||||
CSUBJPASS = 14;
|
||||
|
||||
// Dependency (unable to determine)
|
||||
DEP = 15;
|
||||
|
||||
// Determiner
|
||||
DET = 16;
|
||||
|
||||
// Discourse
|
||||
DISCOURSE = 17;
|
||||
|
||||
// Direct object
|
||||
DOBJ = 18;
|
||||
|
||||
// Expletive
|
||||
EXPL = 19;
|
||||
|
||||
// Goes with (part of a word in a text not well edited)
|
||||
GOESWITH = 20;
|
||||
|
||||
// Indirect object
|
||||
IOBJ = 21;
|
||||
|
||||
// Marker (word introducing a subordinate clause)
|
||||
MARK = 22;
|
||||
|
||||
// Multi-word expression
|
||||
MWE = 23;
|
||||
|
||||
// Multi-word verbal expression
|
||||
MWV = 24;
|
||||
|
||||
// Negation modifier
|
||||
NEG = 25;
|
||||
|
||||
// Noun compound modifier
|
||||
NN = 26;
|
||||
|
||||
// Noun phrase used as an adverbial modifier
|
||||
NPADVMOD = 27;
|
||||
|
||||
// Nominal subject
|
||||
NSUBJ = 28;
|
||||
|
||||
// Passive nominal subject
|
||||
NSUBJPASS = 29;
|
||||
|
||||
// Numeric modifier of a noun
|
||||
NUM = 30;
|
||||
|
||||
// Element of compound number
|
||||
NUMBER = 31;
|
||||
|
||||
// Punctuation mark
|
||||
P = 32;
|
||||
|
||||
// Parataxis relation
|
||||
PARATAXIS = 33;
|
||||
|
||||
// Participial modifier
|
||||
PARTMOD = 34;
|
||||
|
||||
// The complement of a preposition is a clause
|
||||
PCOMP = 35;
|
||||
|
||||
// Object of a preposition
|
||||
POBJ = 36;
|
||||
|
||||
// Possession modifier
|
||||
POSS = 37;
|
||||
|
||||
// Postverbal negative particle
|
||||
POSTNEG = 38;
|
||||
|
||||
// Predicate complement
|
||||
PRECOMP = 39;
|
||||
|
||||
// Preconjunt
|
||||
PRECONJ = 40;
|
||||
|
||||
// Predeterminer
|
||||
PREDET = 41;
|
||||
|
||||
// Prefix
|
||||
PREF = 42;
|
||||
|
||||
// Prepositional modifier
|
||||
PREP = 43;
|
||||
|
||||
// The relationship between a verb and verbal morpheme
|
||||
PRONL = 44;
|
||||
|
||||
// Particle
|
||||
PRT = 45;
|
||||
|
||||
// Associative or possessive marker
|
||||
PS = 46;
|
||||
|
||||
// Quantifier phrase modifier
|
||||
QUANTMOD = 47;
|
||||
|
||||
// Relative clause modifier
|
||||
RCMOD = 48;
|
||||
|
||||
// Complementizer in relative clause
|
||||
RCMODREL = 49;
|
||||
|
||||
// Ellipsis without a preceding predicate
|
||||
RDROP = 50;
|
||||
|
||||
// Referent
|
||||
REF = 51;
|
||||
|
||||
// Remnant
|
||||
REMNANT = 52;
|
||||
|
||||
// Reparandum
|
||||
REPARANDUM = 53;
|
||||
|
||||
// Root
|
||||
ROOT = 54;
|
||||
|
||||
// Suffix specifying a unit of number
|
||||
SNUM = 55;
|
||||
|
||||
// Suffix
|
||||
SUFF = 56;
|
||||
|
||||
// Temporal modifier
|
||||
TMOD = 57;
|
||||
|
||||
// Topic marker
|
||||
TOPIC = 58;
|
||||
|
||||
// Clause headed by an infinite form of the verb that modifies a noun
|
||||
VMOD = 59;
|
||||
|
||||
// Vocative
|
||||
VOCATIVE = 60;
|
||||
|
||||
// Open clausal complement
|
||||
XCOMP = 61;
|
||||
|
||||
// Name suffix
|
||||
SUFFIX = 62;
|
||||
|
||||
// Name title
|
||||
TITLE = 63;
|
||||
|
||||
// Adverbial phrase modifier
|
||||
ADVPHMOD = 64;
|
||||
|
||||
// Causative auxiliary
|
||||
AUXCAUS = 65;
|
||||
|
||||
// Helper auxiliary
|
||||
AUXVV = 66;
|
||||
|
||||
// Rentaishi (Prenominal modifier)
|
||||
DTMOD = 67;
|
||||
|
||||
// Foreign words
|
||||
FOREIGN = 68;
|
||||
|
||||
// Keyword
|
||||
KW = 69;
|
||||
|
||||
// List for chains of comparable items
|
||||
LIST = 70;
|
||||
|
||||
// Nominalized clause
|
||||
NOMC = 71;
|
||||
|
||||
// Nominalized clausal subject
|
||||
NOMCSUBJ = 72;
|
||||
|
||||
// Nominalized clausal passive
|
||||
NOMCSUBJPASS = 73;
|
||||
|
||||
// Compound of numeric modifier
|
||||
NUMC = 74;
|
||||
|
||||
// Copula
|
||||
COP = 75;
|
||||
|
||||
// Dislocated relation (for fronted/topicalized elements)
|
||||
DISLOCATED = 76;
|
||||
}
|
||||
|
||||
// Represents the head of this token in the dependency tree.
|
||||
// This is the index of the token which has an arc going to this token.
|
||||
// The index is the position of the token in the array of tokens returned
|
||||
// by the API method. If this token is a root token, then the
|
||||
// `head_token_index` is its own index.
|
||||
int32 head_token_index = 1;
|
||||
|
||||
// The parse label for the token.
|
||||
Label label = 2;
|
||||
}
|
||||
|
||||
// Represents a mention for an entity in the text. Currently, proper noun
|
||||
// mentions are supported.
|
||||
message EntityMention {
|
||||
// The mention text.
|
||||
TextSpan text = 1;
|
||||
}
|
||||
|
||||
// Represents an output piece of text.
|
||||
message TextSpan {
|
||||
// The content of the output text.
|
||||
string content = 1;
|
||||
|
||||
// The API calculates the beginning offset of the content in the original
|
||||
// document according to the [EncodingType][google.cloud.language.v1beta1.EncodingType] specified in the API request.
|
||||
int32 begin_offset = 2;
|
||||
}
|
||||
|
||||
// The sentiment analysis request message.
|
||||
message AnalyzeSentimentRequest {
|
||||
// Input document. Currently, `analyzeSentiment` only supports English text
|
||||
// ([Document.language][google.cloud.language.v1beta1.Document.language]="EN").
|
||||
Document document = 1;
|
||||
}
|
||||
|
||||
// The sentiment analysis response message.
|
||||
message AnalyzeSentimentResponse {
|
||||
// The overall sentiment of the input document.
|
||||
Sentiment document_sentiment = 1;
|
||||
|
||||
// The language of the text, which will be the same as the language specified
|
||||
// in the request or, if not specified, the automatically-detected language.
|
||||
string language = 2;
|
||||
}
|
||||
|
||||
// The entity analysis request message.
|
||||
message AnalyzeEntitiesRequest {
|
||||
// Input document.
|
||||
Document document = 1;
|
||||
|
||||
// The encoding type used by the API to calculate offsets.
|
||||
EncodingType encoding_type = 2;
|
||||
}
|
||||
|
||||
// The entity analysis response message.
|
||||
message AnalyzeEntitiesResponse {
|
||||
// The recognized entities in the input document.
|
||||
repeated Entity entities = 1;
|
||||
|
||||
// The language of the text, which will be the same as the language specified
|
||||
// in the request or, if not specified, the automatically-detected language.
|
||||
string language = 2;
|
||||
}
|
||||
|
||||
// The request message for the advanced text annotation API, which performs all
|
||||
// the above plus syntactic analysis.
|
||||
message AnnotateTextRequest {
|
||||
// All available features for sentiment, syntax, and semantic analysis.
|
||||
// Setting each one to true will enable that specific analysis for the input.
|
||||
message Features {
|
||||
// Extract syntax information.
|
||||
bool extract_syntax = 1;
|
||||
|
||||
// Extract entities.
|
||||
bool extract_entities = 2;
|
||||
|
||||
// Extract document-level sentiment.
|
||||
bool extract_document_sentiment = 3;
|
||||
}
|
||||
|
||||
// Input document.
|
||||
Document document = 1;
|
||||
|
||||
// The enabled features.
|
||||
Features features = 2;
|
||||
|
||||
// The encoding type used by the API to calculate offsets.
|
||||
EncodingType encoding_type = 3;
|
||||
}
|
||||
|
||||
// The text annotations response message.
|
||||
message AnnotateTextResponse {
|
||||
// Sentences in the input document. Populated if the user enables
|
||||
// [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax].
|
||||
repeated Sentence sentences = 1;
|
||||
|
||||
// Tokens, along with their syntactic information, in the input document.
|
||||
// Populated if the user enables
|
||||
// [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax].
|
||||
repeated Token tokens = 2;
|
||||
|
||||
// Entities, along with their semantic information, in the input document.
|
||||
// Populated if the user enables
|
||||
// [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_entities].
|
||||
repeated Entity entities = 3;
|
||||
|
||||
// The overall sentiment for the document. Populated if the user enables
|
||||
// [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment].
|
||||
Sentiment document_sentiment = 4;
|
||||
|
||||
// The language of the text, which will be the same as the language specified
|
||||
// in the request or, if not specified, the automatically-detected language.
|
||||
string language = 5;
|
||||
}
|
||||
|
||||
// Represents the text encoding that the caller uses to process the output.
|
||||
// Providing an `EncodingType` is recommended because the API provides the
|
||||
// beginning offsets for various outputs, such as tokens and mentions, and
|
||||
// languages that natively use different text encodings may access offsets
|
||||
// differently.
|
||||
enum EncodingType {
|
||||
// If `EncodingType` is not specified, encoding-dependent information (such as
|
||||
// `begin_offset`) will be set at `-1`.
|
||||
NONE = 0;
|
||||
|
||||
// Encoding-dependent information (such as `begin_offset`) is calculated based
|
||||
// on the UTF-8 encoding of the input. C++ and Go are examples of languages
|
||||
// that use this encoding natively.
|
||||
UTF8 = 1;
|
||||
|
||||
// Encoding-dependent information (such as `begin_offset`) is calculated based
|
||||
// on the UTF-16 encoding of the input. Java and Javascript are examples of
|
||||
// languages that use this encoding natively.
|
||||
UTF16 = 2;
|
||||
|
||||
// Encoding-dependent information (such as `begin_offset`) is calculated based
|
||||
// on the UTF-32 encoding of the input. Python is an example of a language
|
||||
// that uses this encoding natively.
|
||||
UTF32 = 3;
|
||||
}
|
||||
Loading…
Reference in New Issue