You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
274 lines
11 KiB
274 lines
11 KiB
/*
|
|
* Copyright (C) 2018 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "annotator/datetime/datetime-grounder.h"
|
|
|
|
#include <limits>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
#include "annotator/datetime/datetime_generated.h"
|
|
#include "annotator/datetime/utils.h"
|
|
#include "annotator/types.h"
|
|
#include "utils/base/integral_types.h"
|
|
#include "utils/base/status.h"
|
|
#include "utils/base/status_macros.h"
|
|
|
|
using ::libtextclassifier3::grammar::datetime::AbsoluteDateTime;
|
|
using ::libtextclassifier3::grammar::datetime::ComponentType;
|
|
using ::libtextclassifier3::grammar::datetime::Meridiem;
|
|
using ::libtextclassifier3::grammar::datetime::RelativeDateTime;
|
|
using ::libtextclassifier3::grammar::datetime::RelativeDatetimeComponent;
|
|
using ::libtextclassifier3::grammar::datetime::UngroundedDatetime;
|
|
using ::libtextclassifier3::grammar::datetime::RelativeDatetimeComponent_::
|
|
Modifier;
|
|
|
|
namespace libtextclassifier3 {
|
|
|
|
namespace {
|
|
|
|
const std::unordered_map<int, int> kMonthDefaultLastDayMap(
|
|
{{/*no_month*/ 0, 31},
|
|
{/*January*/ 1, 31},
|
|
{/*Febuary*/ 2, 29},
|
|
{/*March*/ 3, 31},
|
|
{/*April*/ 4, 30},
|
|
{/*May*/ 5, 31},
|
|
{/*June*/ 6, 30},
|
|
{/*July*/ 7, 31},
|
|
{/*August*/ 8, 31},
|
|
{/*September*/ 9, 30},
|
|
{/*October*/ 10, 31},
|
|
{/*November*/ 11, 30},
|
|
{/*December*/ 12, 31}});
|
|
|
|
bool IsValidDatetime(const AbsoluteDateTime* absolute_datetime) {
|
|
// Sanity Checks.
|
|
if (absolute_datetime->minute() > 59 || absolute_datetime->second() > 59 ||
|
|
absolute_datetime->hour() > 23 || absolute_datetime->month() > 12 ||
|
|
absolute_datetime->month() == 0) {
|
|
return false;
|
|
}
|
|
if (absolute_datetime->day() >= 0) {
|
|
int min_day_value = 1;
|
|
int max_day_value = 31;
|
|
if (absolute_datetime->month() >= 0 && absolute_datetime->month() <= 12) {
|
|
max_day_value = kMonthDefaultLastDayMap.at(absolute_datetime->month());
|
|
if (absolute_datetime->day() < min_day_value ||
|
|
absolute_datetime->day() > max_day_value) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool IsValidDatetime(const RelativeDateTime* relative_datetime) {
|
|
if (relative_datetime->base()) {
|
|
return IsValidDatetime(relative_datetime->base());
|
|
}
|
|
return true;
|
|
}
|
|
|
|
StatusOr<DatetimeComponent::RelativeQualifier> ToRelativeQualifier(
|
|
const Modifier& modifier) {
|
|
switch (modifier) {
|
|
case Modifier::Modifier_THIS:
|
|
return DatetimeComponent::RelativeQualifier::THIS;
|
|
case Modifier::Modifier_LAST:
|
|
return DatetimeComponent::RelativeQualifier::LAST;
|
|
case Modifier::Modifier_NEXT:
|
|
return DatetimeComponent::RelativeQualifier::NEXT;
|
|
case Modifier::Modifier_NOW:
|
|
return DatetimeComponent::RelativeQualifier::NOW;
|
|
case Modifier::Modifier_TOMORROW:
|
|
return DatetimeComponent::RelativeQualifier::TOMORROW;
|
|
case Modifier::Modifier_YESTERDAY:
|
|
return DatetimeComponent::RelativeQualifier::YESTERDAY;
|
|
case Modifier::Modifier_PAST:
|
|
return DatetimeComponent::RelativeQualifier::PAST;
|
|
case Modifier::Modifier_FUTURE:
|
|
return DatetimeComponent::RelativeQualifier::FUTURE;
|
|
case Modifier::Modifier_UNSPECIFIED:
|
|
return DatetimeComponent::RelativeQualifier::UNSPECIFIED;
|
|
default:
|
|
return Status(StatusCode::INTERNAL,
|
|
"Couldn't parse the Modifier to RelativeQualifier.");
|
|
}
|
|
}
|
|
|
|
StatusOr<DatetimeComponent::ComponentType> ToComponentType(
|
|
const grammar::datetime::ComponentType component_type) {
|
|
switch (component_type) {
|
|
case grammar::datetime::ComponentType_YEAR:
|
|
return DatetimeComponent::ComponentType::YEAR;
|
|
case grammar::datetime::ComponentType_MONTH:
|
|
return DatetimeComponent::ComponentType::MONTH;
|
|
case grammar::datetime::ComponentType_WEEK:
|
|
return DatetimeComponent::ComponentType::WEEK;
|
|
case grammar::datetime::ComponentType_DAY_OF_WEEK:
|
|
return DatetimeComponent::ComponentType::DAY_OF_WEEK;
|
|
case grammar::datetime::ComponentType_DAY_OF_MONTH:
|
|
return DatetimeComponent::ComponentType::DAY_OF_MONTH;
|
|
case grammar::datetime::ComponentType_HOUR:
|
|
return DatetimeComponent::ComponentType::HOUR;
|
|
case grammar::datetime::ComponentType_MINUTE:
|
|
return DatetimeComponent::ComponentType::MINUTE;
|
|
case grammar::datetime::ComponentType_SECOND:
|
|
return DatetimeComponent::ComponentType::SECOND;
|
|
case grammar::datetime::ComponentType_MERIDIEM:
|
|
return DatetimeComponent::ComponentType::MERIDIEM;
|
|
case grammar::datetime::ComponentType_UNSPECIFIED:
|
|
return DatetimeComponent::ComponentType::UNSPECIFIED;
|
|
default:
|
|
return Status(StatusCode::INTERNAL,
|
|
"Couldn't parse the DatetimeComponent's ComponentType from "
|
|
"grammar's datetime ComponentType.");
|
|
}
|
|
}
|
|
|
|
void FillAbsoluteDateTimeComponents(
|
|
const grammar::datetime::AbsoluteDateTime* absolute_datetime,
|
|
DatetimeParsedData* datetime_parsed_data) {
|
|
if (absolute_datetime->year() >= 0) {
|
|
datetime_parsed_data->SetAbsoluteValue(
|
|
DatetimeComponent::ComponentType::YEAR,
|
|
GetAdjustedYear(absolute_datetime->year()));
|
|
}
|
|
if (absolute_datetime->month() >= 0) {
|
|
datetime_parsed_data->SetAbsoluteValue(
|
|
DatetimeComponent::ComponentType::MONTH, absolute_datetime->month());
|
|
}
|
|
if (absolute_datetime->day() >= 0) {
|
|
datetime_parsed_data->SetAbsoluteValue(
|
|
DatetimeComponent::ComponentType::DAY_OF_MONTH,
|
|
absolute_datetime->day());
|
|
}
|
|
if (absolute_datetime->week_day() >= 0) {
|
|
datetime_parsed_data->SetAbsoluteValue(
|
|
DatetimeComponent::ComponentType::DAY_OF_WEEK,
|
|
absolute_datetime->week_day());
|
|
}
|
|
if (absolute_datetime->hour() >= 0) {
|
|
datetime_parsed_data->SetAbsoluteValue(
|
|
DatetimeComponent::ComponentType::HOUR, absolute_datetime->hour());
|
|
}
|
|
if (absolute_datetime->minute() >= 0) {
|
|
datetime_parsed_data->SetAbsoluteValue(
|
|
DatetimeComponent::ComponentType::MINUTE, absolute_datetime->minute());
|
|
}
|
|
if (absolute_datetime->second() >= 0) {
|
|
datetime_parsed_data->SetAbsoluteValue(
|
|
DatetimeComponent::ComponentType::SECOND, absolute_datetime->second());
|
|
}
|
|
if (absolute_datetime->meridiem() != grammar::datetime::Meridiem_UNKNOWN) {
|
|
datetime_parsed_data->SetAbsoluteValue(
|
|
DatetimeComponent::ComponentType::MERIDIEM,
|
|
absolute_datetime->meridiem() == grammar::datetime::Meridiem_AM ? 0
|
|
: 1);
|
|
}
|
|
if (absolute_datetime->time_zone()) {
|
|
datetime_parsed_data->SetAbsoluteValue(
|
|
DatetimeComponent::ComponentType::ZONE_OFFSET,
|
|
absolute_datetime->time_zone()->utc_offset_mins());
|
|
}
|
|
}
|
|
|
|
StatusOr<DatetimeParsedData> FillRelativeDateTimeComponents(
|
|
const grammar::datetime::RelativeDateTime* relative_datetime) {
|
|
DatetimeParsedData datetime_parsed_data;
|
|
for (const RelativeDatetimeComponent* relative_component :
|
|
*relative_datetime->relative_datetime_component()) {
|
|
TC3_ASSIGN_OR_RETURN(const DatetimeComponent::ComponentType component_type,
|
|
ToComponentType(relative_component->component_type()));
|
|
datetime_parsed_data.SetRelativeCount(component_type,
|
|
relative_component->value());
|
|
TC3_ASSIGN_OR_RETURN(
|
|
const DatetimeComponent::RelativeQualifier relative_qualifier,
|
|
ToRelativeQualifier(relative_component->modifier()));
|
|
datetime_parsed_data.SetRelativeValue(component_type, relative_qualifier);
|
|
}
|
|
if (relative_datetime->base()) {
|
|
FillAbsoluteDateTimeComponents(relative_datetime->base(),
|
|
&datetime_parsed_data);
|
|
}
|
|
return datetime_parsed_data;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
DatetimeGrounder::DatetimeGrounder(const CalendarLib* calendarlib)
|
|
: calendarlib_(*calendarlib) {}
|
|
|
|
StatusOr<std::vector<DatetimeParseResult>> DatetimeGrounder::Ground(
|
|
const int64 reference_time_ms_utc, const std::string& reference_timezone,
|
|
const std::string& reference_locale,
|
|
const grammar::datetime::UngroundedDatetime* ungrounded_datetime) const {
|
|
DatetimeParsedData datetime_parsed_data;
|
|
if (ungrounded_datetime->absolute_datetime()) {
|
|
FillAbsoluteDateTimeComponents(ungrounded_datetime->absolute_datetime(),
|
|
&datetime_parsed_data);
|
|
} else if (ungrounded_datetime->relative_datetime()) {
|
|
TC3_ASSIGN_OR_RETURN(datetime_parsed_data,
|
|
FillRelativeDateTimeComponents(
|
|
ungrounded_datetime->relative_datetime()));
|
|
}
|
|
std::vector<DatetimeParsedData> interpretations;
|
|
FillInterpretations(datetime_parsed_data,
|
|
calendarlib_.GetGranularity(datetime_parsed_data),
|
|
&interpretations);
|
|
std::vector<DatetimeParseResult> datetime_parse_result;
|
|
|
|
for (const DatetimeParsedData& interpretation : interpretations) {
|
|
std::vector<DatetimeComponent> date_components;
|
|
interpretation.GetDatetimeComponents(&date_components);
|
|
DatetimeParseResult result;
|
|
// Text classifier only provides ambiguity limited to “AM/PM” which is
|
|
// encoded in the pair of DatetimeParseResult; both corresponding to the
|
|
// same date, but one corresponding to “AM” and the other one corresponding
|
|
// to “PM”.
|
|
if (!calendarlib_.InterpretParseData(
|
|
interpretation, reference_time_ms_utc, reference_timezone,
|
|
reference_locale, /*prefer_future_for_unspecified_date=*/true,
|
|
&(result.time_ms_utc), &(result.granularity))) {
|
|
return Status(
|
|
StatusCode::INTERNAL,
|
|
"Couldn't parse the UngroundedDatetime to DatetimeParseResult.");
|
|
}
|
|
|
|
// Sort the date time units by component type.
|
|
std::sort(date_components.begin(), date_components.end(),
|
|
[](DatetimeComponent a, DatetimeComponent b) {
|
|
return a.component_type > b.component_type;
|
|
});
|
|
result.datetime_components.swap(date_components);
|
|
datetime_parse_result.push_back(result);
|
|
}
|
|
return datetime_parse_result;
|
|
}
|
|
|
|
bool DatetimeGrounder::IsValidUngroundedDatetime(
|
|
const UngroundedDatetime* ungrounded_datetime) const {
|
|
if (ungrounded_datetime->absolute_datetime()) {
|
|
return IsValidDatetime(ungrounded_datetime->absolute_datetime());
|
|
} else if (ungrounded_datetime->relative_datetime()) {
|
|
return IsValidDatetime(ungrounded_datetime->relative_datetime());
|
|
}
|
|
return false;
|
|
}
|
|
|
|
} // namespace libtextclassifier3
|